• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Werror | FileCheck %s
2 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
3 
4 // Don't include mm_malloc.h, it's system specific.
5 #define __MM_MALLOC_H
6 
7 #include <x86intrin.h>
8 
test_mm_add_epi8(__m128i A,__m128i B)9 __m128i test_mm_add_epi8(__m128i A, __m128i B) {
10   // CHECK-LABEL: test_mm_add_epi8
11   // CHECK: add <16 x i8>
12   return _mm_add_epi8(A, B);
13 }
14 
test_mm_add_epi16(__m128i A,__m128i B)15 __m128i test_mm_add_epi16(__m128i A, __m128i B) {
16   // CHECK-LABEL: test_mm_add_epi16
17   // CHECK: add <8 x i16>
18   return _mm_add_epi16(A, B);
19 }
20 
test_mm_add_epi32(__m128i A,__m128i B)21 __m128i test_mm_add_epi32(__m128i A, __m128i B) {
22   // CHECK-LABEL: test_mm_add_epi32
23   // CHECK: add <4 x i32>
24   return _mm_add_epi32(A, B);
25 }
26 
test_mm_add_epi64(__m128i A,__m128i B)27 __m128i test_mm_add_epi64(__m128i A, __m128i B) {
28   // CHECK-LABEL: test_mm_add_epi64
29   // CHECK: add <2 x i64>
30   return _mm_add_epi64(A, B);
31 }
32 
test_mm_add_pd(__m128d A,__m128d B)33 __m128d test_mm_add_pd(__m128d A, __m128d B) {
34   // CHECK-LABEL: test_mm_add_pd
35   // CHECK: fadd <2 x double>
36   return _mm_add_pd(A, B);
37 }
38 
test_mm_add_sd(__m128d A,__m128d B)39 __m128d test_mm_add_sd(__m128d A, __m128d B) {
40   // CHECK-LABEL: test_mm_add_sd
41   // CHECK: fadd double
42   return _mm_add_sd(A, B);
43 }
44 
test_mm_adds_epi8(__m128i A,__m128i B)45 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
46   // CHECK-LABEL: test_mm_adds_epi8
47   // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b
48   return _mm_adds_epi8(A, B);
49 }
50 
test_mm_adds_epi16(__m128i A,__m128i B)51 __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
52   // CHECK-LABEL: test_mm_adds_epi16
53   // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w
54   return _mm_adds_epi16(A, B);
55 }
56 
test_mm_adds_epu8(__m128i A,__m128i B)57 __m128i test_mm_adds_epu8(__m128i A, __m128i B) {
58   // CHECK-LABEL: test_mm_adds_epu8
59   // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b
60   return _mm_adds_epu8(A, B);
61 }
62 
test_mm_adds_epu16(__m128i A,__m128i B)63 __m128i test_mm_adds_epu16(__m128i A, __m128i B) {
64   // CHECK-LABEL: test_mm_adds_epu16
65   // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w
66   return _mm_adds_epu16(A, B);
67 }
68 
test_mm_and_pd(__m128d A,__m128d B)69 __m128d test_mm_and_pd(__m128d A, __m128d B) {
70   // CHECK-LABEL: test_mm_and_pd
71   // CHECK: and <4 x i32>
72   return _mm_and_pd(A, B);
73 }
74 
test_mm_and_si128(__m128i A,__m128i B)75 __m128i test_mm_and_si128(__m128i A, __m128i B) {
76   // CHECK-LABEL: test_mm_and_si128
77   // CHECK: and <2 x i64>
78   return _mm_and_si128(A, B);
79 }
80 
test_mm_avg_epu8(__m128i A,__m128i B)81 __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
82   // CHECK-LABEL: test_mm_avg_epu8
83   // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b
84   return _mm_avg_epu8(A, B);
85 }
86 
test_mm_avg_epu16(__m128i A,__m128i B)87 __m128i test_mm_avg_epu16(__m128i A, __m128i B) {
88   // CHECK-LABEL: test_mm_avg_epu16
89   // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w
90   return _mm_avg_epu16(A, B);
91 }
92 
test_mm_bslli_si128(__m128i A)93 __m128i test_mm_bslli_si128(__m128i A) {
94   // CHECK-LABEL: test_mm_bslli_si128
95   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
96   return _mm_bslli_si128(A, 5);
97 }
98 
test_mm_bsrli_si128(__m128i A)99 __m128i test_mm_bsrli_si128(__m128i A) {
100   // CHECK-LABEL: test_mm_bsrli_si128
101   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
102   return _mm_bsrli_si128(A, 5);
103 }
104 
test_mm_clflush(void * A)105 void test_mm_clflush(void* A) {
106   // CHECK-LABEL: test_mm_clflush
107   // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}})
108   _mm_clflush(A);
109 }
110 
test_mm_cmpeq_epi8(__m128i A,__m128i B)111 __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
112   // CHECK-LABEL: test_mm_cmpeq_epi8
113   // CHECK: icmp eq <16 x i8>
114   return _mm_cmpeq_epi8(A, B);
115 }
116 
test_mm_cmpeq_epi16(__m128i A,__m128i B)117 __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
118   // CHECK-LABEL: test_mm_cmpeq_epi16
119   // CHECK: icmp eq <8 x i16>
120   return _mm_cmpeq_epi16(A, B);
121 }
122 
test_mm_cmpeq_epi32(__m128i A,__m128i B)123 __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) {
124   // CHECK-LABEL: test_mm_cmpeq_epi32
125   // CHECK: icmp eq <4 x i32>
126   return _mm_cmpeq_epi32(A, B);
127 }
128 
test_mm_cmpeq_pd(__m128d A,__m128d B)129 __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
130   // CHECK-LABEL: test_mm_cmpeq_pd
131   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
132   return _mm_cmpeq_pd(A, B);
133 }
134 
test_mm_cmpeq_sd(__m128d A,__m128d B)135 __m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
136   // CHECK-LABEL: test_mm_cmpeq_sd
137   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
138   return _mm_cmpeq_sd(A, B);
139 }
140 
test_mm_cmpge_pd(__m128d A,__m128d B)141 __m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
142   // CHECK-LABEL: test_mm_cmpge_pd
143   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
144   return _mm_cmpge_pd(A, B);
145 }
146 
test_mm_cmpge_sd(__m128d A,__m128d B)147 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
148   // CHECK-LABEL: test_mm_cmpge_sd
149   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
150   return _mm_cmpge_sd(A, B);
151 }
152 
test_mm_cmpgt_epi8(__m128i A,__m128i B)153 __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
154   // CHECK-LABEL: test_mm_cmpgt_epi8
155   // CHECK: icmp sgt <16 x i8>
156   return _mm_cmpgt_epi8(A, B);
157 }
158 
test_mm_cmpgt_epi16(__m128i A,__m128i B)159 __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
160   // CHECK-LABEL: test_mm_cmpgt_epi16
161   // CHECK: icmp sgt <8 x i16>
162   return _mm_cmpgt_epi16(A, B);
163 }
164 
test_mm_cmpgt_epi32(__m128i A,__m128i B)165 __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
166   // CHECK-LABEL: test_mm_cmpgt_epi32
167   // CHECK: icmp sgt <4 x i32>
168   return _mm_cmpgt_epi32(A, B);
169 }
170 
test_mm_cmpgt_pd(__m128d A,__m128d B)171 __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
172   // CHECK-LABEL: test_mm_cmpgt_pd
173   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
174   return _mm_cmpgt_pd(A, B);
175 }
176 
test_mm_cmpgt_sd(__m128d A,__m128d B)177 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
178   // CHECK-LABEL: test_mm_cmpgt_sd
179   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
180   return _mm_cmpgt_sd(A, B);
181 }
182 
test_mm_cmple_pd(__m128d A,__m128d B)183 __m128d test_mm_cmple_pd(__m128d A, __m128d B) {
184   // CHECK-LABEL: test_mm_cmple_pd
185   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
186   return _mm_cmple_pd(A, B);
187 }
188 
test_mm_cmple_sd(__m128d A,__m128d B)189 __m128d test_mm_cmple_sd(__m128d A, __m128d B) {
190   // CHECK-LABEL: test_mm_cmple_sd
191   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
192   return _mm_cmple_sd(A, B);
193 }
194 
test_mm_cmplt_epi8(__m128i A,__m128i B)195 __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
196   // CHECK-LABEL: test_mm_cmplt_epi8
197   // CHECK: icmp sgt <16 x i8>
198   return _mm_cmplt_epi8(A, B);
199 }
200 
test_mm_cmplt_epi16(__m128i A,__m128i B)201 __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
202   // CHECK-LABEL: test_mm_cmplt_epi16
203   // CHECK: icmp sgt <8 x i16>
204   return _mm_cmplt_epi16(A, B);
205 }
206 
test_mm_cmplt_epi32(__m128i A,__m128i B)207 __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) {
208   // CHECK-LABEL: test_mm_cmplt_epi32
209   // CHECK: icmp sgt <4 x i32>
210   return _mm_cmplt_epi32(A, B);
211 }
212 
test_mm_cmplt_pd(__m128d A,__m128d B)213 __m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
214   // CHECK-LABEL: test_mm_cmplt_pd
215   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
216   return _mm_cmplt_pd(A, B);
217 }
218 
test_mm_cmplt_sd(__m128d A,__m128d B)219 __m128d test_mm_cmplt_sd(__m128d A, __m128d B) {
220   // CHECK-LABEL: test_mm_cmplt_sd
221   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
222   return _mm_cmplt_sd(A, B);
223 }
224 
test_mm_cmpneq_pd(__m128d A,__m128d B)225 __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
226   // CHECK-LABEL: test_mm_cmpneq_pd
227   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
228   return _mm_cmpneq_pd(A, B);
229 }
230 
test_mm_cmpneq_sd(__m128d A,__m128d B)231 __m128d test_mm_cmpneq_sd(__m128d A, __m128d B) {
232   // CHECK-LABEL: test_mm_cmpneq_sd
233   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
234   return _mm_cmpneq_sd(A, B);
235 }
236 
test_mm_cmpnge_pd(__m128d A,__m128d B)237 __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
238   // CHECK-LABEL: test_mm_cmpnge_pd
239   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
240   return _mm_cmpnge_pd(A, B);
241 }
242 
test_mm_cmpnge_sd(__m128d A,__m128d B)243 __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
244   // CHECK-LABEL: test_mm_cmpnge_sd
245   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
246   return _mm_cmpnge_sd(A, B);
247 }
248 
test_mm_cmpngt_pd(__m128d A,__m128d B)249 __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
250   // CHECK-LABEL: test_mm_cmpngt_pd
251   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
252   return _mm_cmpngt_pd(A, B);
253 }
254 
test_mm_cmpngt_sd(__m128d A,__m128d B)255 __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
256   // CHECK-LABEL: test_mm_cmpngt_sd
257   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
258   return _mm_cmpngt_sd(A, B);
259 }
260 
test_mm_cmpnle_pd(__m128d A,__m128d B)261 __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
262   // CHECK-LABEL: test_mm_cmpnle_pd
263   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
264   return _mm_cmpnle_pd(A, B);
265 }
266 
test_mm_cmpnle_sd(__m128d A,__m128d B)267 __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
268   // CHECK-LABEL: test_mm_cmpnle_sd
269   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
270   return _mm_cmpnle_sd(A, B);
271 }
272 
test_mm_cmpnlt_pd(__m128d A,__m128d B)273 __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
274   // CHECK-LABEL: test_mm_cmpnlt_pd
275   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
276   return _mm_cmpnlt_pd(A, B);
277 }
278 
test_mm_cmpnlt_sd(__m128d A,__m128d B)279 __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
280   // CHECK-LABEL: test_mm_cmpnlt_sd
281   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
282   return _mm_cmpnlt_sd(A, B);
283 }
284 
test_mm_cmpord_pd(__m128d A,__m128d B)285 __m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
286   // CHECK-LABEL: test_mm_cmpord_pd
287   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
288   return _mm_cmpord_pd(A, B);
289 }
290 
test_mm_cmpord_sd(__m128d A,__m128d B)291 __m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
292   // CHECK-LABEL: test_mm_cmpord_sd
293   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
294   return _mm_cmpord_sd(A, B);
295 }
296 
test_mm_cmpunord_pd(__m128d A,__m128d B)297 __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
298   // CHECK-LABEL: test_mm_cmpunord_pd
299   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
300   return _mm_cmpunord_pd(A, B);
301 }
302 
test_mm_cmpunord_sd(__m128d A,__m128d B)303 __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
304   // CHECK-LABEL: test_mm_cmpunord_sd
305   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
306   return _mm_cmpunord_sd(A, B);
307 }
308 
test_mm_comieq_sd(__m128d A,__m128d B)309 int test_mm_comieq_sd(__m128d A, __m128d B) {
310   // CHECK-LABEL: test_mm_comieq_sd
311   // CHECK: call i32 @llvm.x86.sse2.comieq.sd
312   return _mm_comieq_sd(A, B);
313 }
314 
test_mm_comige_sd(__m128d A,__m128d B)315 int test_mm_comige_sd(__m128d A, __m128d B) {
316   // CHECK-LABEL: test_mm_comige_sd
317   // CHECK: call i32 @llvm.x86.sse2.comige.sd
318   return _mm_comige_sd(A, B);
319 }
320 
test_mm_comigt_sd(__m128d A,__m128d B)321 int test_mm_comigt_sd(__m128d A, __m128d B) {
322   // CHECK-LABEL: test_mm_comigt_sd
323   // CHECK: call i32 @llvm.x86.sse2.comigt.sd
324   return _mm_comigt_sd(A, B);
325 }
326 
test_mm_comile_sd(__m128d A,__m128d B)327 int test_mm_comile_sd(__m128d A, __m128d B) {
328   // CHECK-LABEL: test_mm_comile_sd
329   // CHECK: call i32 @llvm.x86.sse2.comile.sd
330   return _mm_comile_sd(A, B);
331 }
332 
test_mm_comilt_sd(__m128d A,__m128d B)333 int test_mm_comilt_sd(__m128d A, __m128d B) {
334   // CHECK-LABEL: test_mm_comilt_sd
335   // CHECK: call i32 @llvm.x86.sse2.comilt.sd
336   return _mm_comilt_sd(A, B);
337 }
338 
test_mm_comineq_sd(__m128d A,__m128d B)339 int test_mm_comineq_sd(__m128d A, __m128d B) {
340   // CHECK-LABEL: test_mm_comineq_sd
341   // CHECK: call i32 @llvm.x86.sse2.comineq.sd
342   return _mm_comineq_sd(A, B);
343 }
344 
test_mm_cvtepi32_pd(__m128i A)345 __m128d test_mm_cvtepi32_pd(__m128i A) {
346   // CHECK-LABEL: test_mm_cvtepi32_pd
347   // CHECK: call <2 x double> @llvm.x86.sse2.cvtdq2pd
348   return _mm_cvtepi32_pd(A);
349 }
350 
test_mm_cvtepi32_ps(__m128i A)351 __m128 test_mm_cvtepi32_ps(__m128i A) {
352   // CHECK-LABEL: test_mm_cvtepi32_ps
353   // CHECK: call <4 x float> @llvm.x86.sse2.cvtdq2ps
354   return _mm_cvtepi32_ps(A);
355 }
356 
test_mm_cvtpd_epi32(__m128d A)357 __m128i test_mm_cvtpd_epi32(__m128d A) {
358   // CHECK-LABEL: test_mm_cvtpd_epi32
359   // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq
360   return _mm_cvtpd_epi32(A);
361 }
362 
test_mm_cvtpd_ps(__m128d A)363 __m128 test_mm_cvtpd_ps(__m128d A) {
364   // CHECK-LABEL: test_mm_cvtpd_ps
365   // CHECK: call <4 x float> @llvm.x86.sse2.cvtpd2ps
366   return _mm_cvtpd_ps(A);
367 }
368 
test_mm_cvtps_epi32(__m128 A)369 __m128i test_mm_cvtps_epi32(__m128 A) {
370   // CHECK-LABEL: test_mm_cvtps_epi32
371   // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq
372   return _mm_cvtps_epi32(A);
373 }
374 
test_mm_cvtps_pd(__m128 A)375 __m128d test_mm_cvtps_pd(__m128 A) {
376   // CHECK-LABEL: test_mm_cvtps_pd
377   // CHECK: call <2 x double> @llvm.x86.sse2.cvtps2pd
378   return _mm_cvtps_pd(A);
379 }
380 
test_mm_cvtsd_f64(__m128d A)381 double test_mm_cvtsd_f64(__m128d A) {
382   // CHECK-LABEL: test_mm_cvtsd_f64
383   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
384   return _mm_cvtsd_f64(A);
385 }
386 
test_mm_cvtsd_si32(__m128d A)387 int test_mm_cvtsd_si32(__m128d A) {
388   // CHECK-LABEL: test_mm_cvtsd_si32
389   // CHECK: call i32 @llvm.x86.sse2.cvtsd2si
390   return _mm_cvtsd_si32(A);
391 }
392 
test_mm_cvtsd_si64(__m128d A)393 long long test_mm_cvtsd_si64(__m128d A) {
394   // CHECK-LABEL: test_mm_cvtsd_si64
395   // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64
396   return _mm_cvtsd_si64(A);
397 }
398 
test_mm_cvtsd_ss(__m128 A,__m128d B)399 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
400   // CHECK-LABEL: test_mm_cvtsd_ss
401   // CHECK: fptrunc double %{{.*}} to float
402   return _mm_cvtsd_ss(A, B);
403 }
404 
test_mm_cvtsi128_si32(__m128i A)405 int test_mm_cvtsi128_si32(__m128i A) {
406   // CHECK-LABEL: test_mm_cvtsi128_si32
407   // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
408   return _mm_cvtsi128_si32(A);
409 }
410 
test_mm_cvtsi128_si64(__m128i A)411 long long test_mm_cvtsi128_si64(__m128i A) {
412   // CHECK-LABEL: test_mm_cvtsi128_si64
413   // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
414   return _mm_cvtsi128_si64(A);
415 }
416 
test_mm_cvtsi32_sd(__m128d A,int B)417 __m128d test_mm_cvtsi32_sd(__m128d A, int B) {
418   // CHECK-LABEL: test_mm_cvtsi32_sd
419   // CHECK: sitofp i32 %{{.*}} to double
420   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
421   return _mm_cvtsi32_sd(A, B);
422 }
423 
test_mm_cvtsi32_si128(int A)424 __m128i test_mm_cvtsi32_si128(int A) {
425   // CHECK-LABEL: test_mm_cvtsi32_si128
426   // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
427   return _mm_cvtsi32_si128(A);
428 }
429 
test_mm_cvtsi64_sd(__m128d A,long long B)430 __m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
431   // CHECK-LABEL: test_mm_cvtsi64_sd
432   // CHECK: sitofp i64 %{{.*}} to double
433   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
434   return _mm_cvtsi64_sd(A, B);
435 }
436 
test_mm_cvtsi64_si128(long long A)437 __m128i test_mm_cvtsi64_si128(long long A) {
438   // CHECK-LABEL: test_mm_cvtsi64_si128
439   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
440   return _mm_cvtsi64_si128(A);
441 }
442 
test_mm_cvtss_sd(__m128d A,__m128 B)443 __m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
444   // CHECK-LABEL: test_mm_cvtss_sd
445   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
446   // CHECK: fpext float %{{.*}} to double
447   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
448   return _mm_cvtss_sd(A, B);
449 }
450 
test_mm_cvttpd_epi32(__m128d A)451 __m128i test_mm_cvttpd_epi32(__m128d A) {
452   // CHECK-LABEL: test_mm_cvttpd_epi32
453   // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq
454   return _mm_cvttpd_epi32(A);
455 }
456 
test_mm_cvttps_epi32(__m128 A)457 __m128i test_mm_cvttps_epi32(__m128 A) {
458   // CHECK-LABEL: test_mm_cvttps_epi32
459   // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq
460   return _mm_cvttps_epi32(A);
461 }
462 
test_mm_cvttsd_si32(__m128d A)463 int test_mm_cvttsd_si32(__m128d A) {
464   // CHECK-LABEL: test_mm_cvttsd_si32
465   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
466   // CHECK: fptosi double %{{.*}} to i32
467   return _mm_cvttsd_si32(A);
468 }
469 
test_mm_cvttsd_si64(__m128d A)470 long long test_mm_cvttsd_si64(__m128d A) {
471   // CHECK-LABEL: test_mm_cvttsd_si64
472   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
473   // CHECK: fptosi double %{{.*}} to i64
474   return _mm_cvttsd_si64(A);
475 }
476 
test_mm_div_pd(__m128d A,__m128d B)477 __m128d test_mm_div_pd(__m128d A, __m128d B) {
478   // CHECK-LABEL: test_mm_div_pd
479   // CHECK: fdiv <2 x double>
480   return _mm_div_pd(A, B);
481 }
482 
test_mm_div_sd(__m128d A,__m128d B)483 __m128d test_mm_div_sd(__m128d A, __m128d B) {
484   // CHECK-LABEL: test_mm_div_sd
485   // CHECK: fdiv double
486   return _mm_div_sd(A, B);
487 }
488 
489 // Lowering to pextrw requires optimization.
test_mm_extract_epi16(__m128i A)490 int test_mm_extract_epi16(__m128i A) {
491   // CHECK-LABEL: test_mm_extract_epi16
492   // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
493   // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]]
494   return _mm_extract_epi16(A, 8);
495 }
496 
test_mm_insert_epi16(__m128i A,short B)497 __m128i test_mm_insert_epi16(__m128i A, short B) {
498   // CHECK-LABEL: test_mm_insert_epi16
499   // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
500   // CHECK: insertelement <8 x i16> %{{.*}}, i32 [[x]]
501   return _mm_insert_epi16(A, B, 8);
502 }
503 
test_mm_lfence()504 void test_mm_lfence() {
505   // CHECK-LABEL: test_mm_lfence
506   // CHECK: call void @llvm.x86.sse2.lfence()
507   _mm_lfence();
508 }
509 
test_mm_load_pd(double const * A)510 __m128d test_mm_load_pd(double const* A) {
511   // CHECK-LABEL: test_mm_load_pd
512   // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
513   return _mm_load_pd(A);
514 }
515 
test_mm_load_sd(double const * A)516 __m128d test_mm_load_sd(double const* A) {
517   // CHECK-LABEL: test_mm_load_sd
518   // CHECK: load double, double* %{{.*}}, align 1
519   return _mm_load_sd(A);
520 }
521 
test_mm_load_si128(__m128i const * A)522 __m128i test_mm_load_si128(__m128i const* A) {
523   // CHECK-LABEL: test_mm_load_si128
524   // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
525   return _mm_load_si128(A);
526 }
527 
test_mm_load1_pd(double const * A)528 __m128d test_mm_load1_pd(double const* A) {
529   // CHECK-LABEL: test_mm_load1_pd
530   // CHECK: load double, double* %{{.*}}, align 8
531   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
532   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
533   return _mm_load1_pd(A);
534 }
535 
test_mm_loadh_pd(__m128d x,void * y)536 __m128d test_mm_loadh_pd(__m128d x, void* y) {
537   // CHECK-LABEL: test_mm_loadh_pd
538   // CHECK: load double, double* %{{.*}}, align 1{{$}}
539   return _mm_loadh_pd(x, y);
540 }
541 
test_mm_loadr_pd(double const * A)542 __m128d test_mm_loadr_pd(double const* A) {
543   // CHECK-LABEL: test_mm_loadr_pd
544   // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
545   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
546   return _mm_loadr_pd(A);
547 }
548 
test_mm_loadu_pd(double const * A)549 __m128d test_mm_loadu_pd(double const* A) {
550   // CHECK-LABEL: test_mm_loadu_pd
551   // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1
552   return _mm_loadu_pd(A);
553 }
554 
test_mm_loadu_si128(__m128i const * A)555 __m128i test_mm_loadu_si128(__m128i const* A) {
556   // CHECK-LABEL: test_mm_loadu_si128
557   // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1
558   return _mm_loadu_si128(A);
559 }
560 
test_mm_madd_epi16(__m128i A,__m128i B)561 __m128i test_mm_madd_epi16(__m128i A, __m128i B) {
562   // CHECK-LABEL: test_mm_madd_epi16
563   // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
564   return _mm_madd_epi16(A, B);
565 }
566 
test_mm_maskmoveu_si128(__m128i A,__m128i B,char * C)567 void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) {
568   // CHECK-LABEL: test_mm_maskmoveu_si128
569   // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8* %{{.*}})
570   _mm_maskmoveu_si128(A, B, C);
571 }
572 
test_mm_max_epi16(__m128i A,__m128i B)573 __m128i test_mm_max_epi16(__m128i A, __m128i B) {
574   // CHECK-LABEL: test_mm_max_epi16
575   // CHECK: call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
576   return _mm_max_epi16(A, B);
577 }
578 
test_mm_max_epu8(__m128i A,__m128i B)579 __m128i test_mm_max_epu8(__m128i A, __m128i B) {
580   // CHECK-LABEL: test_mm_max_epu8
581   // CHECK: call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
582   return _mm_max_epu8(A, B);
583 }
584 
test_mm_max_pd(__m128d A,__m128d B)585 __m128d test_mm_max_pd(__m128d A, __m128d B) {
586   // CHECK-LABEL: test_mm_max_pd
587   // CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
588   return _mm_max_pd(A, B);
589 }
590 
test_mm_max_sd(__m128d A,__m128d B)591 __m128d test_mm_max_sd(__m128d A, __m128d B) {
592   // CHECK-LABEL: test_mm_max_sd
593   // CHECK: call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
594   return _mm_max_sd(A, B);
595 }
596 
test_mm_mfence()597 void test_mm_mfence() {
598   // CHECK-LABEL: test_mm_mfence
599   // CHECK: call void @llvm.x86.sse2.mfence()
600   _mm_mfence();
601 }
602 
test_mm_min_epi16(__m128i A,__m128i B)603 __m128i test_mm_min_epi16(__m128i A, __m128i B) {
604   // CHECK-LABEL: test_mm_min_epi16
605   // CHECK: call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
606   return _mm_min_epi16(A, B);
607 }
608 
test_mm_min_epu8(__m128i A,__m128i B)609 __m128i test_mm_min_epu8(__m128i A, __m128i B) {
610   // CHECK-LABEL: test_mm_min_epu8
611   // CHECK: call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
612   return _mm_min_epu8(A, B);
613 }
614 
test_mm_min_pd(__m128d A,__m128d B)615 __m128d test_mm_min_pd(__m128d A, __m128d B) {
616   // CHECK-LABEL: test_mm_min_pd
617   // CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
618   return _mm_min_pd(A, B);
619 }
620 
test_mm_min_sd(__m128d A,__m128d B)621 __m128d test_mm_min_sd(__m128d A, __m128d B) {
622   // CHECK-LABEL: test_mm_min_sd
623   // CHECK: call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
624   return _mm_min_sd(A, B);
625 }
626 
test_mm_movemask_epi8(__m128i A)627 int test_mm_movemask_epi8(__m128i A) {
628   // CHECK-LABEL: test_mm_movemask_epi8
629   // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
630   return _mm_movemask_epi8(A);
631 }
632 
test_mm_movemask_pd(__m128d A)633 int test_mm_movemask_pd(__m128d A) {
634   // CHECK-LABEL: test_mm_movemask_pd
635   // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
636   return _mm_movemask_pd(A);
637 }
638 
test_mm_mul_epu32(__m128i A,__m128i B)639 __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
640   // CHECK-LABEL: test_mm_mul_epu32
641   // CHECK: call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
642   return _mm_mul_epu32(A, B);
643 }
644 
test_mm_mul_pd(__m128d A,__m128d B)645 __m128d test_mm_mul_pd(__m128d A, __m128d B) {
646   // CHECK-LABEL: test_mm_mul_pd
647   // CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
648   return _mm_mul_pd(A, B);
649 }
650 
test_mm_mul_sd(__m128d A,__m128d B)651 __m128d test_mm_mul_sd(__m128d A, __m128d B) {
652   // CHECK-LABEL: test_mm_mul_sd
653   // CHECK: fmul double %{{.*}}, %{{.*}}
654   return _mm_mul_sd(A, B);
655 }
656 
test_mm_mulhi_epi16(__m128i A,__m128i B)657 __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
658   // CHECK-LABEL: test_mm_mulhi_epi16
659   // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
660   return _mm_mulhi_epi16(A, B);
661 }
662 
test_mm_mulhi_epu16(__m128i A,__m128i B)663 __m128i test_mm_mulhi_epu16(__m128i A, __m128i B) {
664   // CHECK-LABEL: test_mm_mulhi_epu16
665   // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
666   return _mm_mulhi_epu16(A, B);
667 }
668 
test_mm_mullo_epi16(__m128i A,__m128i B)669 __m128i test_mm_mullo_epi16(__m128i A, __m128i B) {
670   // CHECK-LABEL: test_mm_mullo_epi16
671   // CHECK: mul <8 x i16> %{{.*}}, %{{.*}}
672   return _mm_mullo_epi16(A, B);
673 }
674 
test_mm_or_pd(__m128d A,__m128d B)675 __m128d test_mm_or_pd(__m128d A, __m128d B) {
676   // CHECK-LABEL: test_mm_or_pd
677   // CHECK: or <4 x i32> %{{.*}}, %{{.*}}
678   return _mm_or_pd(A, B);
679 }
680 
test_mm_or_si128(__m128i A,__m128i B)681 __m128i test_mm_or_si128(__m128i A, __m128i B) {
682   // CHECK-LABEL: test_mm_or_si128
683   // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
684   return _mm_or_si128(A, B);
685 }
686 
test_mm_packs_epi16(__m128i A,__m128i B)687 __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
688   // CHECK-LABEL: test_mm_packs_epi16
689   // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
690   return _mm_packs_epi16(A, B);
691 }
692 
test_mm_packs_epi32(__m128i A,__m128i B)693 __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
694   // CHECK-LABEL: test_mm_packs_epi32
695   // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
696   return _mm_packs_epi32(A, B);
697 }
698 
test_mm_packus_epi16(__m128i A,__m128i B)699 __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
700   // CHECK-LABEL: test_mm_packus_epi16
701   // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
702   return _mm_packus_epi16(A, B);
703 }
704 
test_mm_pause()705 void test_mm_pause() {
706   // CHECK-LABEL: test_mm_pause
707   // CHECK: call void @llvm.x86.sse2.pause()
708   return _mm_pause();
709 }
710 
test_mm_sad_epu8(__m128i A,__m128i B)711 __m128i test_mm_sad_epu8(__m128i A, __m128i B) {
712   // CHECK-LABEL: test_mm_sad_epu8
713   // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
714   return _mm_sad_epu8(A, B);
715 }
716 
test_mm_setzero_pd()717 __m128d test_mm_setzero_pd() {
718   // CHECK-LABEL: test_mm_setzero_pd
719   // CHECK: store <2 x double> zeroinitializer
720   return _mm_setzero_pd();
721 }
722 
test_mm_setzero_si128()723 __m128i test_mm_setzero_si128() {
724   // CHECK-LABEL: test_mm_setzero_si128
725   // CHECK: store <2 x i64> zeroinitializer
726   return _mm_setzero_si128();
727 }
728 
test_mm_shuffle_epi32(__m128i A)729 __m128i test_mm_shuffle_epi32(__m128i A) {
730   // CHECK-LABEL: test_mm_shuffle_epi32
731   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
732   return _mm_shuffle_epi32(A, 0);
733 }
734 
test_mm_shuffle_pd(__m128d A,__m128d B)735 __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
736   // CHECK-LABEL: test_mm_shuffle_pd
737   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
738   return _mm_shuffle_pd(A, B, 1);
739 }
740 
test_mm_shufflehi_epi16(__m128i A)741 __m128i test_mm_shufflehi_epi16(__m128i A) {
742   // CHECK-LABEL: test_mm_shufflehi_epi16
743   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
744   return _mm_shufflehi_epi16(A, 0);
745 }
746 
test_mm_shufflelo_epi16(__m128i A)747 __m128i test_mm_shufflelo_epi16(__m128i A) {
748   // CHECK-LABEL: test_mm_shufflelo_epi16
749   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
750   return _mm_shufflelo_epi16(A, 0);
751 }
752 
test_mm_sll_epi16(__m128i A,__m128i B)753 __m128i test_mm_sll_epi16(__m128i A, __m128i B) {
754   // CHECK-LABEL: test_mm_sll_epi16
755   // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w
756   return _mm_sll_epi16(A, B);
757 }
758 
test_mm_sll_epi32(__m128i A,__m128i B)759 __m128i test_mm_sll_epi32(__m128i A, __m128i B) {
760   // CHECK-LABEL: test_mm_sll_epi32
761   // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d
762   return _mm_sll_epi32(A, B);
763 }
764 
test_mm_sll_epi64(__m128i A,__m128i B)765 __m128i test_mm_sll_epi64(__m128i A, __m128i B) {
766   // CHECK-LABEL: test_mm_sll_epi64
767   // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q
768   return _mm_sll_epi64(A, B);
769 }
770 
test_mm_slli_epi16(__m128i A)771 __m128i test_mm_slli_epi16(__m128i A) {
772   // CHECK-LABEL: test_mm_slli_epi16
773   // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w
774   return _mm_slli_epi16(A, 1);
775 }
776 
test_mm_slli_epi32(__m128i A)777 __m128i test_mm_slli_epi32(__m128i A) {
778   // CHECK-LABEL: test_mm_slli_epi32
779   // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d
780   return _mm_slli_epi32(A, 1);
781 }
782 
test_mm_slli_epi64(__m128i A)783 __m128i test_mm_slli_epi64(__m128i A) {
784   // CHECK-LABEL: test_mm_slli_epi64
785   // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q
786   return _mm_slli_epi64(A, 1);
787 }
788 
test_mm_slli_si128(__m128i A)789 __m128i test_mm_slli_si128(__m128i A) {
790   // CHECK-LABEL: test_mm_slli_si128
791   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
792   return _mm_slli_si128(A, 5);
793 }
794 
test_mm_sqrt_pd(__m128d A)795 __m128d test_mm_sqrt_pd(__m128d A) {
796   // CHECK-LABEL: test_mm_sqrt_pd
797   // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %{{.*}})
798   return _mm_sqrt_pd(A);
799 }
800 
test_mm_sqrt_sd(__m128d A,__m128d B)801 __m128d test_mm_sqrt_sd(__m128d A, __m128d B) {
802   // CHECK-LABEL: test_mm_sqrt_sd
803   // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %{{.*}})
804   return _mm_sqrt_sd(A, B);
805 }
806 
test_mm_sra_epi16(__m128i A,__m128i B)807 __m128i test_mm_sra_epi16(__m128i A, __m128i B) {
808   // CHECK-LABEL: test_mm_sra_epi16
809   // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w
810   return _mm_sra_epi16(A, B);
811 }
812 
test_mm_sra_epi32(__m128i A,__m128i B)813 __m128i test_mm_sra_epi32(__m128i A, __m128i B) {
814   // CHECK-LABEL: test_mm_sra_epi32
815   // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d
816   return _mm_sra_epi32(A, B);
817 }
818 
test_mm_srai_epi16(__m128i A)819 __m128i test_mm_srai_epi16(__m128i A) {
820   // CHECK-LABEL: test_mm_srai_epi16
821   // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w
822   return _mm_srai_epi16(A, 1);
823 }
824 
test_mm_srai_epi32(__m128i A)825 __m128i test_mm_srai_epi32(__m128i A) {
826   // CHECK-LABEL: test_mm_srai_epi32
827   // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d
828   return _mm_srai_epi32(A, 1);
829 }
830 
test_mm_srl_epi16(__m128i A,__m128i B)831 __m128i test_mm_srl_epi16(__m128i A, __m128i B) {
832   // CHECK-LABEL: test_mm_srl_epi16
833   // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w
834   return _mm_srl_epi16(A, B);
835 }
836 
test_mm_srl_epi32(__m128i A,__m128i B)837 __m128i test_mm_srl_epi32(__m128i A, __m128i B) {
838   // CHECK-LABEL: test_mm_srl_epi32
839   // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d
840   return _mm_srl_epi32(A, B);
841 }
842 
test_mm_srl_epi64(__m128i A,__m128i B)843 __m128i test_mm_srl_epi64(__m128i A, __m128i B) {
844   // CHECK-LABEL: test_mm_srl_epi64
845   // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q
846   return _mm_srl_epi64(A, B);
847 }
848 
test_mm_srli_epi16(__m128i A)849 __m128i test_mm_srli_epi16(__m128i A) {
850   // CHECK-LABEL: test_mm_srli_epi16
851   // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w
852   return _mm_srli_epi16(A, 1);
853 }
854 
test_mm_srli_epi32(__m128i A)855 __m128i test_mm_srli_epi32(__m128i A) {
856   // CHECK-LABEL: test_mm_srli_epi32
857   // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d
858   return _mm_srli_epi32(A, 1);
859 }
860 
test_mm_srli_epi64(__m128i A)861 __m128i test_mm_srli_epi64(__m128i A) {
862   // CHECK-LABEL: test_mm_srli_epi64
863   // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q
864   return _mm_srli_epi64(A, 1);
865 }
866 
test_mm_srli_si128(__m128i A)867 __m128i test_mm_srli_si128(__m128i A) {
868   // CHECK-LABEL: test_mm_srli_si128
869   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
870   return _mm_srli_si128(A, 5);
871 }
872 
test_mm_store_pd(double * A,__m128d B)873 void test_mm_store_pd(double* A, __m128d B) {
874   // CHECK-LABEL: test_mm_store_pd
875   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
876   _mm_store_pd(A, B);
877 }
878 
test_mm_store_sd(double * A,__m128d B)879 void test_mm_store_sd(double* A, __m128d B) {
880   // CHECK-LABEL: test_mm_store_sd
881   // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
882   _mm_store_sd(A, B);
883 }
884 
test_mm_store_si128(__m128i * A,__m128i B)885 void test_mm_store_si128(__m128i* A, __m128i B) {
886   // CHECK-LABEL: test_mm_store_si128
887   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
888   _mm_store_si128(A, B);
889 }
890 
test_mm_storeh_pd(double * A,__m128d B)891 void test_mm_storeh_pd(double* A, __m128d B) {
892   // CHECK-LABEL: test_mm_storeh_pd
893   // CHECK: store double %{{.*}}, double* %{{.*}}, align 1
894   _mm_storeh_pd(A, B);
895 }
896 
test_mm_storel_pd(double * A,__m128d B)897 void test_mm_storel_pd(double* A, __m128d B) {
898   // CHECK-LABEL: test_mm_storel_pd
899   // CHECK: store double %{{.*}}, double* %{{.*}}, align 1
900   _mm_storel_pd(A, B);
901 }
902 
test_mm_storeu_pd(double * A,__m128d B)903 void test_mm_storeu_pd(double* A, __m128d B) {
904   // CHECK-LABEL: test_mm_storeu_pd
905   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 1
906   _mm_storeu_pd(A, B);
907 }
908 
test_mm_storeu_si128(__m128i * A,__m128i B)909 void test_mm_storeu_si128(__m128i* A, __m128i B) {
910   // CHECK-LABEL: test_mm_storeu_si128
911   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1
912   _mm_storeu_si128(A, B);
913 }
914 
test_mm_stream_pd(double * A,__m128d B)915 void test_mm_stream_pd(double *A, __m128d B) {
916   // CHECK-LABEL: test_mm_stream_pd
917   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal
918   _mm_stream_pd(A, B);
919 }
920 
test_mm_stream_si32(int * A,int B)921 void test_mm_stream_si32(int *A, int B) {
922   // CHECK-LABEL: test_mm_stream_si32
923   // CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1, !nontemporal
924   _mm_stream_si32(A, B);
925 }
926 
test_mm_stream_si64(long long * A,long long B)927 void test_mm_stream_si64(long long *A, long long B) {
928   // CHECK-LABEL: test_mm_stream_si64
929   // CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 1, !nontemporal
930   _mm_stream_si64(A, B);
931 }
932 
test_mm_stream_si128(__m128i * A,__m128i B)933 void test_mm_stream_si128(__m128i *A, __m128i B) {
934   // CHECK-LABEL: test_mm_stream_si128
935   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16, !nontemporal
936   _mm_stream_si128(A, B);
937 }
938 
test_mm_sub_epi8(__m128i A,__m128i B)939 __m128i test_mm_sub_epi8(__m128i A, __m128i B) {
940   // CHECK-LABEL: test_mm_sub_epi8
941   // CHECK: sub <16 x i8>
942   return _mm_sub_epi8(A, B);
943 }
944 
test_mm_sub_epi16(__m128i A,__m128i B)945 __m128i test_mm_sub_epi16(__m128i A, __m128i B) {
946   // CHECK-LABEL: test_mm_sub_epi16
947   // CHECK: sub <8 x i16>
948   return _mm_sub_epi16(A, B);
949 }
950 
test_mm_sub_epi32(__m128i A,__m128i B)951 __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
952   // CHECK-LABEL: test_mm_sub_epi32
953   // CHECK: sub <4 x i32>
954   return _mm_sub_epi32(A, B);
955 }
956 
test_mm_sub_epi64(__m128i A,__m128i B)957 __m128i test_mm_sub_epi64(__m128i A, __m128i B) {
958   // CHECK-LABEL: test_mm_sub_epi64
959   // CHECK: sub <2 x i64>
960   return _mm_sub_epi64(A, B);
961 }
962 
test_mm_sub_pd(__m128d A,__m128d B)963 __m128d test_mm_sub_pd(__m128d A, __m128d B) {
964   // CHECK-LABEL: test_mm_sub_pd
965   // CHECK: fsub <2 x double>
966   return _mm_sub_pd(A, B);
967 }
968 
test_mm_sub_sd(__m128d A,__m128d B)969 __m128d test_mm_sub_sd(__m128d A, __m128d B) {
970   // CHECK-LABEL: test_mm_sub_sd
971   // CHECK: fsub double
972   return _mm_sub_sd(A, B);
973 }
974 
test_mm_subs_epi8(__m128i A,__m128i B)975 __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
976   // CHECK-LABEL: test_mm_subs_epi8
977   // CHECK: call <16 x i8> @llvm.x86.sse2.psubs.b
978   return _mm_subs_epi8(A, B);
979 }
980 
test_mm_subs_epi16(__m128i A,__m128i B)981 __m128i test_mm_subs_epi16(__m128i A, __m128i B) {
982   // CHECK-LABEL: test_mm_subs_epi16
983   // CHECK: call <8 x i16> @llvm.x86.sse2.psubs.w
984   return _mm_subs_epi16(A, B);
985 }
986 
test_mm_subs_epu8(__m128i A,__m128i B)987 __m128i test_mm_subs_epu8(__m128i A, __m128i B) {
988   // CHECK-LABEL: test_mm_subs_epu8
989   // CHECK: call <16 x i8> @llvm.x86.sse2.psubus.b
990   return _mm_subs_epu8(A, B);
991 }
992 
test_mm_subs_epu16(__m128i A,__m128i B)993 __m128i test_mm_subs_epu16(__m128i A, __m128i B) {
994   // CHECK-LABEL: test_mm_subs_epu16
995   // CHECK: call <8 x i16> @llvm.x86.sse2.psubus.w
996   return _mm_subs_epu16(A, B);
997 }
998 
test_mm_ucomieq_sd(__m128d A,__m128d B)999 int test_mm_ucomieq_sd(__m128d A, __m128d B) {
1000   // CHECK-LABEL: test_mm_ucomieq_sd
1001   // CHECK: call i32 @llvm.x86.sse2.ucomieq.sd
1002   return _mm_ucomieq_sd(A, B);
1003 }
1004 
test_mm_ucomige_sd(__m128d A,__m128d B)1005 int test_mm_ucomige_sd(__m128d A, __m128d B) {
1006   // CHECK-LABEL: test_mm_ucomige_sd
1007   // CHECK: call i32 @llvm.x86.sse2.ucomige.sd
1008   return _mm_ucomige_sd(A, B);
1009 }
1010 
test_mm_ucomigt_sd(__m128d A,__m128d B)1011 int test_mm_ucomigt_sd(__m128d A, __m128d B) {
1012   // CHECK-LABEL: test_mm_ucomigt_sd
1013   // CHECK: call i32 @llvm.x86.sse2.ucomigt.sd
1014   return _mm_ucomigt_sd(A, B);
1015 }
1016 
test_mm_ucomile_sd(__m128d A,__m128d B)1017 int test_mm_ucomile_sd(__m128d A, __m128d B) {
1018   // CHECK-LABEL: test_mm_ucomile_sd
1019   // CHECK: call i32 @llvm.x86.sse2.ucomile.sd
1020   return _mm_ucomile_sd(A, B);
1021 }
1022 
test_mm_ucomilt_sd(__m128d A,__m128d B)1023 int test_mm_ucomilt_sd(__m128d A, __m128d B) {
1024   // CHECK-LABEL: test_mm_ucomilt_sd
1025   // CHECK: call i32 @llvm.x86.sse2.ucomilt.sd
1026   return _mm_ucomilt_sd(A, B);
1027 }
1028 
test_mm_ucomineq_sd(__m128d A,__m128d B)1029 int test_mm_ucomineq_sd(__m128d A, __m128d B) {
1030   // CHECK-LABEL: test_mm_ucomineq_sd
1031   // CHECK: call i32 @llvm.x86.sse2.ucomineq.sd
1032   return _mm_ucomineq_sd(A, B);
1033 }
1034 
test_mm_unpackhi_epi8(__m128i A,__m128i B)1035 __m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) {
1036   // CHECK-LABEL: test_mm_unpackhi_epi8
1037   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1038   return _mm_unpackhi_epi8(A, B);
1039 }
1040 
test_mm_unpackhi_epi16(__m128i A,__m128i B)1041 __m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) {
1042   // CHECK-LABEL: test_mm_unpackhi_epi16
1043   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1044   return _mm_unpackhi_epi16(A, B);
1045 }
1046 
test_mm_unpackhi_epi32(__m128i A,__m128i B)1047 __m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) {
1048   // CHECK-LABEL: test_mm_unpackhi_epi32
1049   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1050   return _mm_unpackhi_epi32(A, B);
1051 }
1052 
test_mm_unpackhi_epi64(__m128i A,__m128i B)1053 __m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) {
1054   // CHECK-LABEL: test_mm_unpackhi_epi64
1055   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
1056   return _mm_unpackhi_epi64(A, B);
1057 }
1058 
test_mm_unpackhi_pd(__m128d A,__m128d B)1059 __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
1060   // CHECK-LABEL: test_mm_unpackhi_pd
1061   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
1062   return _mm_unpackhi_pd(A, B);
1063 }
1064 
test_mm_unpacklo_epi8(__m128i A,__m128i B)1065 __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
1066   // CHECK-LABEL: test_mm_unpacklo_epi8
1067   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1068   return _mm_unpacklo_epi8(A, B);
1069 }
1070 
test_mm_unpacklo_epi16(__m128i A,__m128i B)1071 __m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) {
1072   // CHECK-LABEL: test_mm_unpacklo_epi16
1073   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1074   return _mm_unpacklo_epi16(A, B);
1075 }
1076 
test_mm_unpacklo_epi32(__m128i A,__m128i B)1077 __m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) {
1078   // CHECK-LABEL: test_mm_unpacklo_epi32
1079   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1080   return _mm_unpacklo_epi32(A, B);
1081 }
1082 
test_mm_unpacklo_epi64(__m128i A,__m128i B)1083 __m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) {
1084   // CHECK-LABEL: test_mm_unpacklo_epi64
1085   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
1086   return _mm_unpacklo_epi64(A, B);
1087 }
1088 
test_mm_unpacklo_pd(__m128d A,__m128d B)1089 __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
1090   // CHECK-LABEL: test_mm_unpacklo_pd
1091   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
1092   return _mm_unpacklo_pd(A, B);
1093 }
1094 
test_mm_xor_pd(__m128d A,__m128d B)1095 __m128d test_mm_xor_pd(__m128d A, __m128d B) {
1096   // CHECK-LABEL: test_mm_xor_pd
1097   // CHECK: xor <4 x i32> %{{.*}}, %{{.*}}
1098   return _mm_xor_pd(A, B);
1099 }
1100 
test_mm_xor_si128(__m128i A,__m128i B)1101 __m128i test_mm_xor_si128(__m128i A, __m128i B) {
1102   // CHECK-LABEL: test_mm_xor_si128
1103   // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1104   return _mm_xor_si128(A, B);
1105 }
1106