1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Werror | FileCheck %s
2 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
3
4 // Don't include mm_malloc.h, it's system specific.
5 #define __MM_MALLOC_H
6
7 #include <x86intrin.h>
8
test_mm_add_epi8(__m128i A,__m128i B)9 __m128i test_mm_add_epi8(__m128i A, __m128i B) {
10 // CHECK-LABEL: test_mm_add_epi8
11 // CHECK: add <16 x i8>
12 return _mm_add_epi8(A, B);
13 }
14
test_mm_add_epi16(__m128i A,__m128i B)15 __m128i test_mm_add_epi16(__m128i A, __m128i B) {
16 // CHECK-LABEL: test_mm_add_epi16
17 // CHECK: add <8 x i16>
18 return _mm_add_epi16(A, B);
19 }
20
test_mm_add_epi32(__m128i A,__m128i B)21 __m128i test_mm_add_epi32(__m128i A, __m128i B) {
22 // CHECK-LABEL: test_mm_add_epi32
23 // CHECK: add <4 x i32>
24 return _mm_add_epi32(A, B);
25 }
26
test_mm_add_epi64(__m128i A,__m128i B)27 __m128i test_mm_add_epi64(__m128i A, __m128i B) {
28 // CHECK-LABEL: test_mm_add_epi64
29 // CHECK: add <2 x i64>
30 return _mm_add_epi64(A, B);
31 }
32
test_mm_add_pd(__m128d A,__m128d B)33 __m128d test_mm_add_pd(__m128d A, __m128d B) {
34 // CHECK-LABEL: test_mm_add_pd
35 // CHECK: fadd <2 x double>
36 return _mm_add_pd(A, B);
37 }
38
test_mm_add_sd(__m128d A,__m128d B)39 __m128d test_mm_add_sd(__m128d A, __m128d B) {
40 // CHECK-LABEL: test_mm_add_sd
41 // CHECK: fadd double
42 return _mm_add_sd(A, B);
43 }
44
test_mm_adds_epi8(__m128i A,__m128i B)45 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
46 // CHECK-LABEL: test_mm_adds_epi8
47 // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b
48 return _mm_adds_epi8(A, B);
49 }
50
test_mm_adds_epi16(__m128i A,__m128i B)51 __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
52 // CHECK-LABEL: test_mm_adds_epi16
53 // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w
54 return _mm_adds_epi16(A, B);
55 }
56
test_mm_adds_epu8(__m128i A,__m128i B)57 __m128i test_mm_adds_epu8(__m128i A, __m128i B) {
58 // CHECK-LABEL: test_mm_adds_epu8
59 // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b
60 return _mm_adds_epu8(A, B);
61 }
62
test_mm_adds_epu16(__m128i A,__m128i B)63 __m128i test_mm_adds_epu16(__m128i A, __m128i B) {
64 // CHECK-LABEL: test_mm_adds_epu16
65 // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w
66 return _mm_adds_epu16(A, B);
67 }
68
test_mm_and_pd(__m128d A,__m128d B)69 __m128d test_mm_and_pd(__m128d A, __m128d B) {
70 // CHECK-LABEL: test_mm_and_pd
71 // CHECK: and <4 x i32>
72 return _mm_and_pd(A, B);
73 }
74
test_mm_and_si128(__m128i A,__m128i B)75 __m128i test_mm_and_si128(__m128i A, __m128i B) {
76 // CHECK-LABEL: test_mm_and_si128
77 // CHECK: and <2 x i64>
78 return _mm_and_si128(A, B);
79 }
80
test_mm_avg_epu8(__m128i A,__m128i B)81 __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
82 // CHECK-LABEL: test_mm_avg_epu8
83 // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b
84 return _mm_avg_epu8(A, B);
85 }
86
test_mm_avg_epu16(__m128i A,__m128i B)87 __m128i test_mm_avg_epu16(__m128i A, __m128i B) {
88 // CHECK-LABEL: test_mm_avg_epu16
89 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w
90 return _mm_avg_epu16(A, B);
91 }
92
test_mm_bslli_si128(__m128i A)93 __m128i test_mm_bslli_si128(__m128i A) {
94 // CHECK-LABEL: test_mm_bslli_si128
95 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
96 return _mm_bslli_si128(A, 5);
97 }
98
test_mm_bsrli_si128(__m128i A)99 __m128i test_mm_bsrli_si128(__m128i A) {
100 // CHECK-LABEL: test_mm_bsrli_si128
101 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
102 return _mm_bsrli_si128(A, 5);
103 }
104
test_mm_clflush(void * A)105 void test_mm_clflush(void* A) {
106 // CHECK-LABEL: test_mm_clflush
107 // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}})
108 _mm_clflush(A);
109 }
110
test_mm_cmpeq_epi8(__m128i A,__m128i B)111 __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
112 // CHECK-LABEL: test_mm_cmpeq_epi8
113 // CHECK: icmp eq <16 x i8>
114 return _mm_cmpeq_epi8(A, B);
115 }
116
test_mm_cmpeq_epi16(__m128i A,__m128i B)117 __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
118 // CHECK-LABEL: test_mm_cmpeq_epi16
119 // CHECK: icmp eq <8 x i16>
120 return _mm_cmpeq_epi16(A, B);
121 }
122
test_mm_cmpeq_epi32(__m128i A,__m128i B)123 __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) {
124 // CHECK-LABEL: test_mm_cmpeq_epi32
125 // CHECK: icmp eq <4 x i32>
126 return _mm_cmpeq_epi32(A, B);
127 }
128
test_mm_cmpeq_pd(__m128d A,__m128d B)129 __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
130 // CHECK-LABEL: test_mm_cmpeq_pd
131 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
132 return _mm_cmpeq_pd(A, B);
133 }
134
test_mm_cmpeq_sd(__m128d A,__m128d B)135 __m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
136 // CHECK-LABEL: test_mm_cmpeq_sd
137 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
138 return _mm_cmpeq_sd(A, B);
139 }
140
test_mm_cmpge_pd(__m128d A,__m128d B)141 __m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
142 // CHECK-LABEL: test_mm_cmpge_pd
143 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
144 return _mm_cmpge_pd(A, B);
145 }
146
test_mm_cmpge_sd(__m128d A,__m128d B)147 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
148 // CHECK-LABEL: test_mm_cmpge_sd
149 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
150 return _mm_cmpge_sd(A, B);
151 }
152
test_mm_cmpgt_epi8(__m128i A,__m128i B)153 __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
154 // CHECK-LABEL: test_mm_cmpgt_epi8
155 // CHECK: icmp sgt <16 x i8>
156 return _mm_cmpgt_epi8(A, B);
157 }
158
test_mm_cmpgt_epi16(__m128i A,__m128i B)159 __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
160 // CHECK-LABEL: test_mm_cmpgt_epi16
161 // CHECK: icmp sgt <8 x i16>
162 return _mm_cmpgt_epi16(A, B);
163 }
164
test_mm_cmpgt_epi32(__m128i A,__m128i B)165 __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
166 // CHECK-LABEL: test_mm_cmpgt_epi32
167 // CHECK: icmp sgt <4 x i32>
168 return _mm_cmpgt_epi32(A, B);
169 }
170
test_mm_cmpgt_pd(__m128d A,__m128d B)171 __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
172 // CHECK-LABEL: test_mm_cmpgt_pd
173 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
174 return _mm_cmpgt_pd(A, B);
175 }
176
test_mm_cmpgt_sd(__m128d A,__m128d B)177 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
178 // CHECK-LABEL: test_mm_cmpgt_sd
179 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
180 return _mm_cmpgt_sd(A, B);
181 }
182
test_mm_cmple_pd(__m128d A,__m128d B)183 __m128d test_mm_cmple_pd(__m128d A, __m128d B) {
184 // CHECK-LABEL: test_mm_cmple_pd
185 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
186 return _mm_cmple_pd(A, B);
187 }
188
test_mm_cmple_sd(__m128d A,__m128d B)189 __m128d test_mm_cmple_sd(__m128d A, __m128d B) {
190 // CHECK-LABEL: test_mm_cmple_sd
191 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
192 return _mm_cmple_sd(A, B);
193 }
194
test_mm_cmplt_epi8(__m128i A,__m128i B)195 __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
196 // CHECK-LABEL: test_mm_cmplt_epi8
197 // CHECK: icmp sgt <16 x i8>
198 return _mm_cmplt_epi8(A, B);
199 }
200
test_mm_cmplt_epi16(__m128i A,__m128i B)201 __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
202 // CHECK-LABEL: test_mm_cmplt_epi16
203 // CHECK: icmp sgt <8 x i16>
204 return _mm_cmplt_epi16(A, B);
205 }
206
test_mm_cmplt_epi32(__m128i A,__m128i B)207 __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) {
208 // CHECK-LABEL: test_mm_cmplt_epi32
209 // CHECK: icmp sgt <4 x i32>
210 return _mm_cmplt_epi32(A, B);
211 }
212
test_mm_cmplt_pd(__m128d A,__m128d B)213 __m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
214 // CHECK-LABEL: test_mm_cmplt_pd
215 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
216 return _mm_cmplt_pd(A, B);
217 }
218
test_mm_cmplt_sd(__m128d A,__m128d B)219 __m128d test_mm_cmplt_sd(__m128d A, __m128d B) {
220 // CHECK-LABEL: test_mm_cmplt_sd
221 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
222 return _mm_cmplt_sd(A, B);
223 }
224
test_mm_cmpneq_pd(__m128d A,__m128d B)225 __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
226 // CHECK-LABEL: test_mm_cmpneq_pd
227 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
228 return _mm_cmpneq_pd(A, B);
229 }
230
test_mm_cmpneq_sd(__m128d A,__m128d B)231 __m128d test_mm_cmpneq_sd(__m128d A, __m128d B) {
232 // CHECK-LABEL: test_mm_cmpneq_sd
233 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
234 return _mm_cmpneq_sd(A, B);
235 }
236
test_mm_cmpnge_pd(__m128d A,__m128d B)237 __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
238 // CHECK-LABEL: test_mm_cmpnge_pd
239 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
240 return _mm_cmpnge_pd(A, B);
241 }
242
test_mm_cmpnge_sd(__m128d A,__m128d B)243 __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
244 // CHECK-LABEL: test_mm_cmpnge_sd
245 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
246 return _mm_cmpnge_sd(A, B);
247 }
248
test_mm_cmpngt_pd(__m128d A,__m128d B)249 __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
250 // CHECK-LABEL: test_mm_cmpngt_pd
251 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
252 return _mm_cmpngt_pd(A, B);
253 }
254
test_mm_cmpngt_sd(__m128d A,__m128d B)255 __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
256 // CHECK-LABEL: test_mm_cmpngt_sd
257 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
258 return _mm_cmpngt_sd(A, B);
259 }
260
test_mm_cmpnle_pd(__m128d A,__m128d B)261 __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
262 // CHECK-LABEL: test_mm_cmpnle_pd
263 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
264 return _mm_cmpnle_pd(A, B);
265 }
266
test_mm_cmpnle_sd(__m128d A,__m128d B)267 __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
268 // CHECK-LABEL: test_mm_cmpnle_sd
269 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
270 return _mm_cmpnle_sd(A, B);
271 }
272
test_mm_cmpnlt_pd(__m128d A,__m128d B)273 __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
274 // CHECK-LABEL: test_mm_cmpnlt_pd
275 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
276 return _mm_cmpnlt_pd(A, B);
277 }
278
test_mm_cmpnlt_sd(__m128d A,__m128d B)279 __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
280 // CHECK-LABEL: test_mm_cmpnlt_sd
281 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
282 return _mm_cmpnlt_sd(A, B);
283 }
284
test_mm_cmpord_pd(__m128d A,__m128d B)285 __m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
286 // CHECK-LABEL: test_mm_cmpord_pd
287 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
288 return _mm_cmpord_pd(A, B);
289 }
290
test_mm_cmpord_sd(__m128d A,__m128d B)291 __m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
292 // CHECK-LABEL: test_mm_cmpord_sd
293 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
294 return _mm_cmpord_sd(A, B);
295 }
296
test_mm_cmpunord_pd(__m128d A,__m128d B)297 __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
298 // CHECK-LABEL: test_mm_cmpunord_pd
299 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
300 return _mm_cmpunord_pd(A, B);
301 }
302
test_mm_cmpunord_sd(__m128d A,__m128d B)303 __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
304 // CHECK-LABEL: test_mm_cmpunord_sd
305 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
306 return _mm_cmpunord_sd(A, B);
307 }
308
test_mm_comieq_sd(__m128d A,__m128d B)309 int test_mm_comieq_sd(__m128d A, __m128d B) {
310 // CHECK-LABEL: test_mm_comieq_sd
311 // CHECK: call i32 @llvm.x86.sse2.comieq.sd
312 return _mm_comieq_sd(A, B);
313 }
314
test_mm_comige_sd(__m128d A,__m128d B)315 int test_mm_comige_sd(__m128d A, __m128d B) {
316 // CHECK-LABEL: test_mm_comige_sd
317 // CHECK: call i32 @llvm.x86.sse2.comige.sd
318 return _mm_comige_sd(A, B);
319 }
320
test_mm_comigt_sd(__m128d A,__m128d B)321 int test_mm_comigt_sd(__m128d A, __m128d B) {
322 // CHECK-LABEL: test_mm_comigt_sd
323 // CHECK: call i32 @llvm.x86.sse2.comigt.sd
324 return _mm_comigt_sd(A, B);
325 }
326
test_mm_comile_sd(__m128d A,__m128d B)327 int test_mm_comile_sd(__m128d A, __m128d B) {
328 // CHECK-LABEL: test_mm_comile_sd
329 // CHECK: call i32 @llvm.x86.sse2.comile.sd
330 return _mm_comile_sd(A, B);
331 }
332
test_mm_comilt_sd(__m128d A,__m128d B)333 int test_mm_comilt_sd(__m128d A, __m128d B) {
334 // CHECK-LABEL: test_mm_comilt_sd
335 // CHECK: call i32 @llvm.x86.sse2.comilt.sd
336 return _mm_comilt_sd(A, B);
337 }
338
test_mm_comineq_sd(__m128d A,__m128d B)339 int test_mm_comineq_sd(__m128d A, __m128d B) {
340 // CHECK-LABEL: test_mm_comineq_sd
341 // CHECK: call i32 @llvm.x86.sse2.comineq.sd
342 return _mm_comineq_sd(A, B);
343 }
344
test_mm_cvtepi32_pd(__m128i A)345 __m128d test_mm_cvtepi32_pd(__m128i A) {
346 // CHECK-LABEL: test_mm_cvtepi32_pd
347 // CHECK: call <2 x double> @llvm.x86.sse2.cvtdq2pd
348 return _mm_cvtepi32_pd(A);
349 }
350
test_mm_cvtepi32_ps(__m128i A)351 __m128 test_mm_cvtepi32_ps(__m128i A) {
352 // CHECK-LABEL: test_mm_cvtepi32_ps
353 // CHECK: call <4 x float> @llvm.x86.sse2.cvtdq2ps
354 return _mm_cvtepi32_ps(A);
355 }
356
test_mm_cvtpd_epi32(__m128d A)357 __m128i test_mm_cvtpd_epi32(__m128d A) {
358 // CHECK-LABEL: test_mm_cvtpd_epi32
359 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq
360 return _mm_cvtpd_epi32(A);
361 }
362
test_mm_cvtpd_ps(__m128d A)363 __m128 test_mm_cvtpd_ps(__m128d A) {
364 // CHECK-LABEL: test_mm_cvtpd_ps
365 // CHECK: call <4 x float> @llvm.x86.sse2.cvtpd2ps
366 return _mm_cvtpd_ps(A);
367 }
368
test_mm_cvtps_epi32(__m128 A)369 __m128i test_mm_cvtps_epi32(__m128 A) {
370 // CHECK-LABEL: test_mm_cvtps_epi32
371 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq
372 return _mm_cvtps_epi32(A);
373 }
374
test_mm_cvtps_pd(__m128 A)375 __m128d test_mm_cvtps_pd(__m128 A) {
376 // CHECK-LABEL: test_mm_cvtps_pd
377 // CHECK: call <2 x double> @llvm.x86.sse2.cvtps2pd
378 return _mm_cvtps_pd(A);
379 }
380
test_mm_cvtsd_f64(__m128d A)381 double test_mm_cvtsd_f64(__m128d A) {
382 // CHECK-LABEL: test_mm_cvtsd_f64
383 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
384 return _mm_cvtsd_f64(A);
385 }
386
test_mm_cvtsd_si32(__m128d A)387 int test_mm_cvtsd_si32(__m128d A) {
388 // CHECK-LABEL: test_mm_cvtsd_si32
389 // CHECK: call i32 @llvm.x86.sse2.cvtsd2si
390 return _mm_cvtsd_si32(A);
391 }
392
test_mm_cvtsd_si64(__m128d A)393 long long test_mm_cvtsd_si64(__m128d A) {
394 // CHECK-LABEL: test_mm_cvtsd_si64
395 // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64
396 return _mm_cvtsd_si64(A);
397 }
398
test_mm_cvtsd_ss(__m128 A,__m128d B)399 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
400 // CHECK-LABEL: test_mm_cvtsd_ss
401 // CHECK: fptrunc double %{{.*}} to float
402 return _mm_cvtsd_ss(A, B);
403 }
404
test_mm_cvtsi128_si32(__m128i A)405 int test_mm_cvtsi128_si32(__m128i A) {
406 // CHECK-LABEL: test_mm_cvtsi128_si32
407 // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
408 return _mm_cvtsi128_si32(A);
409 }
410
test_mm_cvtsi128_si64(__m128i A)411 long long test_mm_cvtsi128_si64(__m128i A) {
412 // CHECK-LABEL: test_mm_cvtsi128_si64
413 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
414 return _mm_cvtsi128_si64(A);
415 }
416
test_mm_cvtsi32_sd(__m128d A,int B)417 __m128d test_mm_cvtsi32_sd(__m128d A, int B) {
418 // CHECK-LABEL: test_mm_cvtsi32_sd
419 // CHECK: sitofp i32 %{{.*}} to double
420 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
421 return _mm_cvtsi32_sd(A, B);
422 }
423
test_mm_cvtsi32_si128(int A)424 __m128i test_mm_cvtsi32_si128(int A) {
425 // CHECK-LABEL: test_mm_cvtsi32_si128
426 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
427 return _mm_cvtsi32_si128(A);
428 }
429
test_mm_cvtsi64_sd(__m128d A,long long B)430 __m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
431 // CHECK-LABEL: test_mm_cvtsi64_sd
432 // CHECK: sitofp i64 %{{.*}} to double
433 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
434 return _mm_cvtsi64_sd(A, B);
435 }
436
test_mm_cvtsi64_si128(long long A)437 __m128i test_mm_cvtsi64_si128(long long A) {
438 // CHECK-LABEL: test_mm_cvtsi64_si128
439 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
440 return _mm_cvtsi64_si128(A);
441 }
442
test_mm_cvtss_sd(__m128d A,__m128 B)443 __m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
444 // CHECK-LABEL: test_mm_cvtss_sd
445 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
446 // CHECK: fpext float %{{.*}} to double
447 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
448 return _mm_cvtss_sd(A, B);
449 }
450
test_mm_cvttpd_epi32(__m128d A)451 __m128i test_mm_cvttpd_epi32(__m128d A) {
452 // CHECK-LABEL: test_mm_cvttpd_epi32
453 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq
454 return _mm_cvttpd_epi32(A);
455 }
456
test_mm_cvttps_epi32(__m128 A)457 __m128i test_mm_cvttps_epi32(__m128 A) {
458 // CHECK-LABEL: test_mm_cvttps_epi32
459 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq
460 return _mm_cvttps_epi32(A);
461 }
462
test_mm_cvttsd_si32(__m128d A)463 int test_mm_cvttsd_si32(__m128d A) {
464 // CHECK-LABEL: test_mm_cvttsd_si32
465 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
466 // CHECK: fptosi double %{{.*}} to i32
467 return _mm_cvttsd_si32(A);
468 }
469
test_mm_cvttsd_si64(__m128d A)470 long long test_mm_cvttsd_si64(__m128d A) {
471 // CHECK-LABEL: test_mm_cvttsd_si64
472 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
473 // CHECK: fptosi double %{{.*}} to i64
474 return _mm_cvttsd_si64(A);
475 }
476
test_mm_div_pd(__m128d A,__m128d B)477 __m128d test_mm_div_pd(__m128d A, __m128d B) {
478 // CHECK-LABEL: test_mm_div_pd
479 // CHECK: fdiv <2 x double>
480 return _mm_div_pd(A, B);
481 }
482
test_mm_div_sd(__m128d A,__m128d B)483 __m128d test_mm_div_sd(__m128d A, __m128d B) {
484 // CHECK-LABEL: test_mm_div_sd
485 // CHECK: fdiv double
486 return _mm_div_sd(A, B);
487 }
488
489 // Lowering to pextrw requires optimization.
test_mm_extract_epi16(__m128i A)490 int test_mm_extract_epi16(__m128i A) {
491 // CHECK-LABEL: test_mm_extract_epi16
492 // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
493 // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]]
494 return _mm_extract_epi16(A, 8);
495 }
496
test_mm_insert_epi16(__m128i A,short B)497 __m128i test_mm_insert_epi16(__m128i A, short B) {
498 // CHECK-LABEL: test_mm_insert_epi16
499 // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
500 // CHECK: insertelement <8 x i16> %{{.*}}, i32 [[x]]
501 return _mm_insert_epi16(A, B, 8);
502 }
503
test_mm_lfence()504 void test_mm_lfence() {
505 // CHECK-LABEL: test_mm_lfence
506 // CHECK: call void @llvm.x86.sse2.lfence()
507 _mm_lfence();
508 }
509
test_mm_load_pd(double const * A)510 __m128d test_mm_load_pd(double const* A) {
511 // CHECK-LABEL: test_mm_load_pd
512 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
513 return _mm_load_pd(A);
514 }
515
test_mm_load_sd(double const * A)516 __m128d test_mm_load_sd(double const* A) {
517 // CHECK-LABEL: test_mm_load_sd
518 // CHECK: load double, double* %{{.*}}, align 1
519 return _mm_load_sd(A);
520 }
521
test_mm_load_si128(__m128i const * A)522 __m128i test_mm_load_si128(__m128i const* A) {
523 // CHECK-LABEL: test_mm_load_si128
524 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
525 return _mm_load_si128(A);
526 }
527
test_mm_load1_pd(double const * A)528 __m128d test_mm_load1_pd(double const* A) {
529 // CHECK-LABEL: test_mm_load1_pd
530 // CHECK: load double, double* %{{.*}}, align 8
531 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
532 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
533 return _mm_load1_pd(A);
534 }
535
test_mm_loadh_pd(__m128d x,void * y)536 __m128d test_mm_loadh_pd(__m128d x, void* y) {
537 // CHECK-LABEL: test_mm_loadh_pd
538 // CHECK: load double, double* %{{.*}}, align 1{{$}}
539 return _mm_loadh_pd(x, y);
540 }
541
test_mm_loadr_pd(double const * A)542 __m128d test_mm_loadr_pd(double const* A) {
543 // CHECK-LABEL: test_mm_loadr_pd
544 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
545 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
546 return _mm_loadr_pd(A);
547 }
548
test_mm_loadu_pd(double const * A)549 __m128d test_mm_loadu_pd(double const* A) {
550 // CHECK-LABEL: test_mm_loadu_pd
551 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1
552 return _mm_loadu_pd(A);
553 }
554
test_mm_loadu_si128(__m128i const * A)555 __m128i test_mm_loadu_si128(__m128i const* A) {
556 // CHECK-LABEL: test_mm_loadu_si128
557 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1
558 return _mm_loadu_si128(A);
559 }
560
test_mm_madd_epi16(__m128i A,__m128i B)561 __m128i test_mm_madd_epi16(__m128i A, __m128i B) {
562 // CHECK-LABEL: test_mm_madd_epi16
563 // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
564 return _mm_madd_epi16(A, B);
565 }
566
test_mm_maskmoveu_si128(__m128i A,__m128i B,char * C)567 void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) {
568 // CHECK-LABEL: test_mm_maskmoveu_si128
569 // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8* %{{.*}})
570 _mm_maskmoveu_si128(A, B, C);
571 }
572
test_mm_max_epi16(__m128i A,__m128i B)573 __m128i test_mm_max_epi16(__m128i A, __m128i B) {
574 // CHECK-LABEL: test_mm_max_epi16
575 // CHECK: call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
576 return _mm_max_epi16(A, B);
577 }
578
test_mm_max_epu8(__m128i A,__m128i B)579 __m128i test_mm_max_epu8(__m128i A, __m128i B) {
580 // CHECK-LABEL: test_mm_max_epu8
581 // CHECK: call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
582 return _mm_max_epu8(A, B);
583 }
584
test_mm_max_pd(__m128d A,__m128d B)585 __m128d test_mm_max_pd(__m128d A, __m128d B) {
586 // CHECK-LABEL: test_mm_max_pd
587 // CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
588 return _mm_max_pd(A, B);
589 }
590
test_mm_max_sd(__m128d A,__m128d B)591 __m128d test_mm_max_sd(__m128d A, __m128d B) {
592 // CHECK-LABEL: test_mm_max_sd
593 // CHECK: call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
594 return _mm_max_sd(A, B);
595 }
596
test_mm_mfence()597 void test_mm_mfence() {
598 // CHECK-LABEL: test_mm_mfence
599 // CHECK: call void @llvm.x86.sse2.mfence()
600 _mm_mfence();
601 }
602
test_mm_min_epi16(__m128i A,__m128i B)603 __m128i test_mm_min_epi16(__m128i A, __m128i B) {
604 // CHECK-LABEL: test_mm_min_epi16
605 // CHECK: call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
606 return _mm_min_epi16(A, B);
607 }
608
test_mm_min_epu8(__m128i A,__m128i B)609 __m128i test_mm_min_epu8(__m128i A, __m128i B) {
610 // CHECK-LABEL: test_mm_min_epu8
611 // CHECK: call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
612 return _mm_min_epu8(A, B);
613 }
614
test_mm_min_pd(__m128d A,__m128d B)615 __m128d test_mm_min_pd(__m128d A, __m128d B) {
616 // CHECK-LABEL: test_mm_min_pd
617 // CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
618 return _mm_min_pd(A, B);
619 }
620
test_mm_min_sd(__m128d A,__m128d B)621 __m128d test_mm_min_sd(__m128d A, __m128d B) {
622 // CHECK-LABEL: test_mm_min_sd
623 // CHECK: call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
624 return _mm_min_sd(A, B);
625 }
626
test_mm_movemask_epi8(__m128i A)627 int test_mm_movemask_epi8(__m128i A) {
628 // CHECK-LABEL: test_mm_movemask_epi8
629 // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
630 return _mm_movemask_epi8(A);
631 }
632
test_mm_movemask_pd(__m128d A)633 int test_mm_movemask_pd(__m128d A) {
634 // CHECK-LABEL: test_mm_movemask_pd
635 // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
636 return _mm_movemask_pd(A);
637 }
638
test_mm_mul_epu32(__m128i A,__m128i B)639 __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
640 // CHECK-LABEL: test_mm_mul_epu32
641 // CHECK: call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
642 return _mm_mul_epu32(A, B);
643 }
644
test_mm_mul_pd(__m128d A,__m128d B)645 __m128d test_mm_mul_pd(__m128d A, __m128d B) {
646 // CHECK-LABEL: test_mm_mul_pd
647 // CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
648 return _mm_mul_pd(A, B);
649 }
650
test_mm_mul_sd(__m128d A,__m128d B)651 __m128d test_mm_mul_sd(__m128d A, __m128d B) {
652 // CHECK-LABEL: test_mm_mul_sd
653 // CHECK: fmul double %{{.*}}, %{{.*}}
654 return _mm_mul_sd(A, B);
655 }
656
test_mm_mulhi_epi16(__m128i A,__m128i B)657 __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
658 // CHECK-LABEL: test_mm_mulhi_epi16
659 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
660 return _mm_mulhi_epi16(A, B);
661 }
662
test_mm_mulhi_epu16(__m128i A,__m128i B)663 __m128i test_mm_mulhi_epu16(__m128i A, __m128i B) {
664 // CHECK-LABEL: test_mm_mulhi_epu16
665 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
666 return _mm_mulhi_epu16(A, B);
667 }
668
test_mm_mullo_epi16(__m128i A,__m128i B)669 __m128i test_mm_mullo_epi16(__m128i A, __m128i B) {
670 // CHECK-LABEL: test_mm_mullo_epi16
671 // CHECK: mul <8 x i16> %{{.*}}, %{{.*}}
672 return _mm_mullo_epi16(A, B);
673 }
674
test_mm_or_pd(__m128d A,__m128d B)675 __m128d test_mm_or_pd(__m128d A, __m128d B) {
676 // CHECK-LABEL: test_mm_or_pd
677 // CHECK: or <4 x i32> %{{.*}}, %{{.*}}
678 return _mm_or_pd(A, B);
679 }
680
test_mm_or_si128(__m128i A,__m128i B)681 __m128i test_mm_or_si128(__m128i A, __m128i B) {
682 // CHECK-LABEL: test_mm_or_si128
683 // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
684 return _mm_or_si128(A, B);
685 }
686
test_mm_packs_epi16(__m128i A,__m128i B)687 __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
688 // CHECK-LABEL: test_mm_packs_epi16
689 // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
690 return _mm_packs_epi16(A, B);
691 }
692
test_mm_packs_epi32(__m128i A,__m128i B)693 __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
694 // CHECK-LABEL: test_mm_packs_epi32
695 // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
696 return _mm_packs_epi32(A, B);
697 }
698
test_mm_packus_epi16(__m128i A,__m128i B)699 __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
700 // CHECK-LABEL: test_mm_packus_epi16
701 // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
702 return _mm_packus_epi16(A, B);
703 }
704
test_mm_pause()705 void test_mm_pause() {
706 // CHECK-LABEL: test_mm_pause
707 // CHECK: call void @llvm.x86.sse2.pause()
708 return _mm_pause();
709 }
710
test_mm_sad_epu8(__m128i A,__m128i B)711 __m128i test_mm_sad_epu8(__m128i A, __m128i B) {
712 // CHECK-LABEL: test_mm_sad_epu8
713 // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
714 return _mm_sad_epu8(A, B);
715 }
716
test_mm_setzero_pd()717 __m128d test_mm_setzero_pd() {
718 // CHECK-LABEL: test_mm_setzero_pd
719 // CHECK: store <2 x double> zeroinitializer
720 return _mm_setzero_pd();
721 }
722
test_mm_setzero_si128()723 __m128i test_mm_setzero_si128() {
724 // CHECK-LABEL: test_mm_setzero_si128
725 // CHECK: store <2 x i64> zeroinitializer
726 return _mm_setzero_si128();
727 }
728
test_mm_shuffle_epi32(__m128i A)729 __m128i test_mm_shuffle_epi32(__m128i A) {
730 // CHECK-LABEL: test_mm_shuffle_epi32
731 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
732 return _mm_shuffle_epi32(A, 0);
733 }
734
test_mm_shuffle_pd(__m128d A,__m128d B)735 __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
736 // CHECK-LABEL: test_mm_shuffle_pd
737 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
738 return _mm_shuffle_pd(A, B, 1);
739 }
740
test_mm_shufflehi_epi16(__m128i A)741 __m128i test_mm_shufflehi_epi16(__m128i A) {
742 // CHECK-LABEL: test_mm_shufflehi_epi16
743 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
744 return _mm_shufflehi_epi16(A, 0);
745 }
746
test_mm_shufflelo_epi16(__m128i A)747 __m128i test_mm_shufflelo_epi16(__m128i A) {
748 // CHECK-LABEL: test_mm_shufflelo_epi16
749 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
750 return _mm_shufflelo_epi16(A, 0);
751 }
752
test_mm_sll_epi16(__m128i A,__m128i B)753 __m128i test_mm_sll_epi16(__m128i A, __m128i B) {
754 // CHECK-LABEL: test_mm_sll_epi16
755 // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w
756 return _mm_sll_epi16(A, B);
757 }
758
test_mm_sll_epi32(__m128i A,__m128i B)759 __m128i test_mm_sll_epi32(__m128i A, __m128i B) {
760 // CHECK-LABEL: test_mm_sll_epi32
761 // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d
762 return _mm_sll_epi32(A, B);
763 }
764
test_mm_sll_epi64(__m128i A,__m128i B)765 __m128i test_mm_sll_epi64(__m128i A, __m128i B) {
766 // CHECK-LABEL: test_mm_sll_epi64
767 // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q
768 return _mm_sll_epi64(A, B);
769 }
770
test_mm_slli_epi16(__m128i A)771 __m128i test_mm_slli_epi16(__m128i A) {
772 // CHECK-LABEL: test_mm_slli_epi16
773 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w
774 return _mm_slli_epi16(A, 1);
775 }
776
test_mm_slli_epi32(__m128i A)777 __m128i test_mm_slli_epi32(__m128i A) {
778 // CHECK-LABEL: test_mm_slli_epi32
779 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d
780 return _mm_slli_epi32(A, 1);
781 }
782
test_mm_slli_epi64(__m128i A)783 __m128i test_mm_slli_epi64(__m128i A) {
784 // CHECK-LABEL: test_mm_slli_epi64
785 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q
786 return _mm_slli_epi64(A, 1);
787 }
788
test_mm_slli_si128(__m128i A)789 __m128i test_mm_slli_si128(__m128i A) {
790 // CHECK-LABEL: test_mm_slli_si128
791 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
792 return _mm_slli_si128(A, 5);
793 }
794
test_mm_sqrt_pd(__m128d A)795 __m128d test_mm_sqrt_pd(__m128d A) {
796 // CHECK-LABEL: test_mm_sqrt_pd
797 // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %{{.*}})
798 return _mm_sqrt_pd(A);
799 }
800
test_mm_sqrt_sd(__m128d A,__m128d B)801 __m128d test_mm_sqrt_sd(__m128d A, __m128d B) {
802 // CHECK-LABEL: test_mm_sqrt_sd
803 // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %{{.*}})
804 return _mm_sqrt_sd(A, B);
805 }
806
test_mm_sra_epi16(__m128i A,__m128i B)807 __m128i test_mm_sra_epi16(__m128i A, __m128i B) {
808 // CHECK-LABEL: test_mm_sra_epi16
809 // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w
810 return _mm_sra_epi16(A, B);
811 }
812
test_mm_sra_epi32(__m128i A,__m128i B)813 __m128i test_mm_sra_epi32(__m128i A, __m128i B) {
814 // CHECK-LABEL: test_mm_sra_epi32
815 // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d
816 return _mm_sra_epi32(A, B);
817 }
818
test_mm_srai_epi16(__m128i A)819 __m128i test_mm_srai_epi16(__m128i A) {
820 // CHECK-LABEL: test_mm_srai_epi16
821 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w
822 return _mm_srai_epi16(A, 1);
823 }
824
test_mm_srai_epi32(__m128i A)825 __m128i test_mm_srai_epi32(__m128i A) {
826 // CHECK-LABEL: test_mm_srai_epi32
827 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d
828 return _mm_srai_epi32(A, 1);
829 }
830
test_mm_srl_epi16(__m128i A,__m128i B)831 __m128i test_mm_srl_epi16(__m128i A, __m128i B) {
832 // CHECK-LABEL: test_mm_srl_epi16
833 // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w
834 return _mm_srl_epi16(A, B);
835 }
836
test_mm_srl_epi32(__m128i A,__m128i B)837 __m128i test_mm_srl_epi32(__m128i A, __m128i B) {
838 // CHECK-LABEL: test_mm_srl_epi32
839 // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d
840 return _mm_srl_epi32(A, B);
841 }
842
test_mm_srl_epi64(__m128i A,__m128i B)843 __m128i test_mm_srl_epi64(__m128i A, __m128i B) {
844 // CHECK-LABEL: test_mm_srl_epi64
845 // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q
846 return _mm_srl_epi64(A, B);
847 }
848
test_mm_srli_epi16(__m128i A)849 __m128i test_mm_srli_epi16(__m128i A) {
850 // CHECK-LABEL: test_mm_srli_epi16
851 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w
852 return _mm_srli_epi16(A, 1);
853 }
854
test_mm_srli_epi32(__m128i A)855 __m128i test_mm_srli_epi32(__m128i A) {
856 // CHECK-LABEL: test_mm_srli_epi32
857 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d
858 return _mm_srli_epi32(A, 1);
859 }
860
test_mm_srli_epi64(__m128i A)861 __m128i test_mm_srli_epi64(__m128i A) {
862 // CHECK-LABEL: test_mm_srli_epi64
863 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q
864 return _mm_srli_epi64(A, 1);
865 }
866
test_mm_srli_si128(__m128i A)867 __m128i test_mm_srli_si128(__m128i A) {
868 // CHECK-LABEL: test_mm_srli_si128
869 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
870 return _mm_srli_si128(A, 5);
871 }
872
test_mm_store_pd(double * A,__m128d B)873 void test_mm_store_pd(double* A, __m128d B) {
874 // CHECK-LABEL: test_mm_store_pd
875 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
876 _mm_store_pd(A, B);
877 }
878
test_mm_store_sd(double * A,__m128d B)879 void test_mm_store_sd(double* A, __m128d B) {
880 // CHECK-LABEL: test_mm_store_sd
881 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
882 _mm_store_sd(A, B);
883 }
884
test_mm_store_si128(__m128i * A,__m128i B)885 void test_mm_store_si128(__m128i* A, __m128i B) {
886 // CHECK-LABEL: test_mm_store_si128
887 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
888 _mm_store_si128(A, B);
889 }
890
test_mm_storeh_pd(double * A,__m128d B)891 void test_mm_storeh_pd(double* A, __m128d B) {
892 // CHECK-LABEL: test_mm_storeh_pd
893 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1
894 _mm_storeh_pd(A, B);
895 }
896
test_mm_storel_pd(double * A,__m128d B)897 void test_mm_storel_pd(double* A, __m128d B) {
898 // CHECK-LABEL: test_mm_storel_pd
899 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1
900 _mm_storel_pd(A, B);
901 }
902
test_mm_storeu_pd(double * A,__m128d B)903 void test_mm_storeu_pd(double* A, __m128d B) {
904 // CHECK-LABEL: test_mm_storeu_pd
905 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 1
906 _mm_storeu_pd(A, B);
907 }
908
test_mm_storeu_si128(__m128i * A,__m128i B)909 void test_mm_storeu_si128(__m128i* A, __m128i B) {
910 // CHECK-LABEL: test_mm_storeu_si128
911 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1
912 _mm_storeu_si128(A, B);
913 }
914
test_mm_stream_pd(double * A,__m128d B)915 void test_mm_stream_pd(double *A, __m128d B) {
916 // CHECK-LABEL: test_mm_stream_pd
917 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal
918 _mm_stream_pd(A, B);
919 }
920
test_mm_stream_si32(int * A,int B)921 void test_mm_stream_si32(int *A, int B) {
922 // CHECK-LABEL: test_mm_stream_si32
923 // CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1, !nontemporal
924 _mm_stream_si32(A, B);
925 }
926
test_mm_stream_si64(long long * A,long long B)927 void test_mm_stream_si64(long long *A, long long B) {
928 // CHECK-LABEL: test_mm_stream_si64
929 // CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 1, !nontemporal
930 _mm_stream_si64(A, B);
931 }
932
test_mm_stream_si128(__m128i * A,__m128i B)933 void test_mm_stream_si128(__m128i *A, __m128i B) {
934 // CHECK-LABEL: test_mm_stream_si128
935 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16, !nontemporal
936 _mm_stream_si128(A, B);
937 }
938
test_mm_sub_epi8(__m128i A,__m128i B)939 __m128i test_mm_sub_epi8(__m128i A, __m128i B) {
940 // CHECK-LABEL: test_mm_sub_epi8
941 // CHECK: sub <16 x i8>
942 return _mm_sub_epi8(A, B);
943 }
944
test_mm_sub_epi16(__m128i A,__m128i B)945 __m128i test_mm_sub_epi16(__m128i A, __m128i B) {
946 // CHECK-LABEL: test_mm_sub_epi16
947 // CHECK: sub <8 x i16>
948 return _mm_sub_epi16(A, B);
949 }
950
test_mm_sub_epi32(__m128i A,__m128i B)951 __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
952 // CHECK-LABEL: test_mm_sub_epi32
953 // CHECK: sub <4 x i32>
954 return _mm_sub_epi32(A, B);
955 }
956
test_mm_sub_epi64(__m128i A,__m128i B)957 __m128i test_mm_sub_epi64(__m128i A, __m128i B) {
958 // CHECK-LABEL: test_mm_sub_epi64
959 // CHECK: sub <2 x i64>
960 return _mm_sub_epi64(A, B);
961 }
962
test_mm_sub_pd(__m128d A,__m128d B)963 __m128d test_mm_sub_pd(__m128d A, __m128d B) {
964 // CHECK-LABEL: test_mm_sub_pd
965 // CHECK: fsub <2 x double>
966 return _mm_sub_pd(A, B);
967 }
968
test_mm_sub_sd(__m128d A,__m128d B)969 __m128d test_mm_sub_sd(__m128d A, __m128d B) {
970 // CHECK-LABEL: test_mm_sub_sd
971 // CHECK: fsub double
972 return _mm_sub_sd(A, B);
973 }
974
test_mm_subs_epi8(__m128i A,__m128i B)975 __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
976 // CHECK-LABEL: test_mm_subs_epi8
977 // CHECK: call <16 x i8> @llvm.x86.sse2.psubs.b
978 return _mm_subs_epi8(A, B);
979 }
980
test_mm_subs_epi16(__m128i A,__m128i B)981 __m128i test_mm_subs_epi16(__m128i A, __m128i B) {
982 // CHECK-LABEL: test_mm_subs_epi16
983 // CHECK: call <8 x i16> @llvm.x86.sse2.psubs.w
984 return _mm_subs_epi16(A, B);
985 }
986
test_mm_subs_epu8(__m128i A,__m128i B)987 __m128i test_mm_subs_epu8(__m128i A, __m128i B) {
988 // CHECK-LABEL: test_mm_subs_epu8
989 // CHECK: call <16 x i8> @llvm.x86.sse2.psubus.b
990 return _mm_subs_epu8(A, B);
991 }
992
test_mm_subs_epu16(__m128i A,__m128i B)993 __m128i test_mm_subs_epu16(__m128i A, __m128i B) {
994 // CHECK-LABEL: test_mm_subs_epu16
995 // CHECK: call <8 x i16> @llvm.x86.sse2.psubus.w
996 return _mm_subs_epu16(A, B);
997 }
998
test_mm_ucomieq_sd(__m128d A,__m128d B)999 int test_mm_ucomieq_sd(__m128d A, __m128d B) {
1000 // CHECK-LABEL: test_mm_ucomieq_sd
1001 // CHECK: call i32 @llvm.x86.sse2.ucomieq.sd
1002 return _mm_ucomieq_sd(A, B);
1003 }
1004
test_mm_ucomige_sd(__m128d A,__m128d B)1005 int test_mm_ucomige_sd(__m128d A, __m128d B) {
1006 // CHECK-LABEL: test_mm_ucomige_sd
1007 // CHECK: call i32 @llvm.x86.sse2.ucomige.sd
1008 return _mm_ucomige_sd(A, B);
1009 }
1010
test_mm_ucomigt_sd(__m128d A,__m128d B)1011 int test_mm_ucomigt_sd(__m128d A, __m128d B) {
1012 // CHECK-LABEL: test_mm_ucomigt_sd
1013 // CHECK: call i32 @llvm.x86.sse2.ucomigt.sd
1014 return _mm_ucomigt_sd(A, B);
1015 }
1016
test_mm_ucomile_sd(__m128d A,__m128d B)1017 int test_mm_ucomile_sd(__m128d A, __m128d B) {
1018 // CHECK-LABEL: test_mm_ucomile_sd
1019 // CHECK: call i32 @llvm.x86.sse2.ucomile.sd
1020 return _mm_ucomile_sd(A, B);
1021 }
1022
test_mm_ucomilt_sd(__m128d A,__m128d B)1023 int test_mm_ucomilt_sd(__m128d A, __m128d B) {
1024 // CHECK-LABEL: test_mm_ucomilt_sd
1025 // CHECK: call i32 @llvm.x86.sse2.ucomilt.sd
1026 return _mm_ucomilt_sd(A, B);
1027 }
1028
test_mm_ucomineq_sd(__m128d A,__m128d B)1029 int test_mm_ucomineq_sd(__m128d A, __m128d B) {
1030 // CHECK-LABEL: test_mm_ucomineq_sd
1031 // CHECK: call i32 @llvm.x86.sse2.ucomineq.sd
1032 return _mm_ucomineq_sd(A, B);
1033 }
1034
test_mm_unpackhi_epi8(__m128i A,__m128i B)1035 __m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) {
1036 // CHECK-LABEL: test_mm_unpackhi_epi8
1037 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1038 return _mm_unpackhi_epi8(A, B);
1039 }
1040
test_mm_unpackhi_epi16(__m128i A,__m128i B)1041 __m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) {
1042 // CHECK-LABEL: test_mm_unpackhi_epi16
1043 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1044 return _mm_unpackhi_epi16(A, B);
1045 }
1046
test_mm_unpackhi_epi32(__m128i A,__m128i B)1047 __m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) {
1048 // CHECK-LABEL: test_mm_unpackhi_epi32
1049 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1050 return _mm_unpackhi_epi32(A, B);
1051 }
1052
test_mm_unpackhi_epi64(__m128i A,__m128i B)1053 __m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) {
1054 // CHECK-LABEL: test_mm_unpackhi_epi64
1055 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
1056 return _mm_unpackhi_epi64(A, B);
1057 }
1058
test_mm_unpackhi_pd(__m128d A,__m128d B)1059 __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
1060 // CHECK-LABEL: test_mm_unpackhi_pd
1061 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
1062 return _mm_unpackhi_pd(A, B);
1063 }
1064
test_mm_unpacklo_epi8(__m128i A,__m128i B)1065 __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
1066 // CHECK-LABEL: test_mm_unpacklo_epi8
1067 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1068 return _mm_unpacklo_epi8(A, B);
1069 }
1070
test_mm_unpacklo_epi16(__m128i A,__m128i B)1071 __m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) {
1072 // CHECK-LABEL: test_mm_unpacklo_epi16
1073 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1074 return _mm_unpacklo_epi16(A, B);
1075 }
1076
test_mm_unpacklo_epi32(__m128i A,__m128i B)1077 __m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) {
1078 // CHECK-LABEL: test_mm_unpacklo_epi32
1079 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1080 return _mm_unpacklo_epi32(A, B);
1081 }
1082
test_mm_unpacklo_epi64(__m128i A,__m128i B)1083 __m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) {
1084 // CHECK-LABEL: test_mm_unpacklo_epi64
1085 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
1086 return _mm_unpacklo_epi64(A, B);
1087 }
1088
test_mm_unpacklo_pd(__m128d A,__m128d B)1089 __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
1090 // CHECK-LABEL: test_mm_unpacklo_pd
1091 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
1092 return _mm_unpacklo_pd(A, B);
1093 }
1094
test_mm_xor_pd(__m128d A,__m128d B)1095 __m128d test_mm_xor_pd(__m128d A, __m128d B) {
1096 // CHECK-LABEL: test_mm_xor_pd
1097 // CHECK: xor <4 x i32> %{{.*}}, %{{.*}}
1098 return _mm_xor_pd(A, B);
1099 }
1100
test_mm_xor_si128(__m128i A,__m128i B)1101 __m128i test_mm_xor_si128(__m128i A, __m128i B) {
1102 // CHECK-LABEL: test_mm_xor_si128
1103 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1104 return _mm_xor_si128(A, B);
1105 }
1106