1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Werror | FileCheck %s
2 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
3
4 // Don't include mm_malloc.h, it's system specific.
5 #define __MM_MALLOC_H
6
7 #include <x86intrin.h>
8
9 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
10
test_mm_add_epi8(__m128i A,__m128i B)11 __m128i test_mm_add_epi8(__m128i A, __m128i B) {
12 // CHECK-LABEL: test_mm_add_epi8
13 // CHECK: add <16 x i8>
14 return _mm_add_epi8(A, B);
15 }
16
test_mm_add_epi16(__m128i A,__m128i B)17 __m128i test_mm_add_epi16(__m128i A, __m128i B) {
18 // CHECK-LABEL: test_mm_add_epi16
19 // CHECK: add <8 x i16>
20 return _mm_add_epi16(A, B);
21 }
22
test_mm_add_epi32(__m128i A,__m128i B)23 __m128i test_mm_add_epi32(__m128i A, __m128i B) {
24 // CHECK-LABEL: test_mm_add_epi32
25 // CHECK: add <4 x i32>
26 return _mm_add_epi32(A, B);
27 }
28
test_mm_add_epi64(__m128i A,__m128i B)29 __m128i test_mm_add_epi64(__m128i A, __m128i B) {
30 // CHECK-LABEL: test_mm_add_epi64
31 // CHECK: add <2 x i64>
32 return _mm_add_epi64(A, B);
33 }
34
test_mm_add_pd(__m128d A,__m128d B)35 __m128d test_mm_add_pd(__m128d A, __m128d B) {
36 // CHECK-LABEL: test_mm_add_pd
37 // CHECK: fadd <2 x double>
38 return _mm_add_pd(A, B);
39 }
40
test_mm_add_sd(__m128d A,__m128d B)41 __m128d test_mm_add_sd(__m128d A, __m128d B) {
42 // CHECK-LABEL: test_mm_add_sd
43 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
44 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
45 // CHECK: fadd double
46 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
47 return _mm_add_sd(A, B);
48 }
49
test_mm_adds_epi8(__m128i A,__m128i B)50 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
51 // CHECK-LABEL: test_mm_adds_epi8
52 // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
53 return _mm_adds_epi8(A, B);
54 }
55
test_mm_adds_epi16(__m128i A,__m128i B)56 __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
57 // CHECK-LABEL: test_mm_adds_epi16
58 // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
59 return _mm_adds_epi16(A, B);
60 }
61
test_mm_adds_epu8(__m128i A,__m128i B)62 __m128i test_mm_adds_epu8(__m128i A, __m128i B) {
63 // CHECK-LABEL: test_mm_adds_epu8
64 // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
65 return _mm_adds_epu8(A, B);
66 }
67
test_mm_adds_epu16(__m128i A,__m128i B)68 __m128i test_mm_adds_epu16(__m128i A, __m128i B) {
69 // CHECK-LABEL: test_mm_adds_epu16
70 // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
71 return _mm_adds_epu16(A, B);
72 }
73
test_mm_and_pd(__m128d A,__m128d B)74 __m128d test_mm_and_pd(__m128d A, __m128d B) {
75 // CHECK-LABEL: test_mm_and_pd
76 // CHECK: and <4 x i32>
77 return _mm_and_pd(A, B);
78 }
79
test_mm_and_si128(__m128i A,__m128i B)80 __m128i test_mm_and_si128(__m128i A, __m128i B) {
81 // CHECK-LABEL: test_mm_and_si128
82 // CHECK: and <2 x i64>
83 return _mm_and_si128(A, B);
84 }
85
test_mm_andnot_pd(__m128d A,__m128d B)86 __m128d test_mm_andnot_pd(__m128d A, __m128d B) {
87 // CHECK-LABEL: test_mm_andnot_pd
88 // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
89 // CHECK: and <4 x i32>
90 return _mm_andnot_pd(A, B);
91 }
92
test_mm_andnot_si128(__m128i A,__m128i B)93 __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
94 // CHECK-LABEL: test_mm_andnot_si128
95 // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
96 // CHECK: and <2 x i64>
97 return _mm_andnot_si128(A, B);
98 }
99
test_mm_avg_epu8(__m128i A,__m128i B)100 __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
101 // CHECK-LABEL: test_mm_avg_epu8
102 // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
103 return _mm_avg_epu8(A, B);
104 }
105
test_mm_avg_epu16(__m128i A,__m128i B)106 __m128i test_mm_avg_epu16(__m128i A, __m128i B) {
107 // CHECK-LABEL: test_mm_avg_epu16
108 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
109 return _mm_avg_epu16(A, B);
110 }
111
test_mm_bslli_si128(__m128i A)112 __m128i test_mm_bslli_si128(__m128i A) {
113 // CHECK-LABEL: test_mm_bslli_si128
114 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
115 return _mm_bslli_si128(A, 5);
116 }
117
test_mm_bsrli_si128(__m128i A)118 __m128i test_mm_bsrli_si128(__m128i A) {
119 // CHECK-LABEL: test_mm_bsrli_si128
120 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
121 return _mm_bsrli_si128(A, 5);
122 }
123
test_mm_castpd_ps(__m128d A)124 __m128 test_mm_castpd_ps(__m128d A) {
125 // CHECK-LABEL: test_mm_castpd_ps
126 // CHECK: bitcast <2 x double> %{{.*}} to <4 x float>
127 return _mm_castpd_ps(A);
128 }
129
test_mm_castpd_si128(__m128d A)130 __m128i test_mm_castpd_si128(__m128d A) {
131 // CHECK-LABEL: test_mm_castpd_si128
132 // CHECK: bitcast <2 x double> %{{.*}} to <2 x i64>
133 return _mm_castpd_si128(A);
134 }
135
test_mm_castps_pd(__m128 A)136 __m128d test_mm_castps_pd(__m128 A) {
137 // CHECK-LABEL: test_mm_castps_pd
138 // CHECK: bitcast <4 x float> %{{.*}} to <2 x double>
139 return _mm_castps_pd(A);
140 }
141
test_mm_castps_si128(__m128 A)142 __m128i test_mm_castps_si128(__m128 A) {
143 // CHECK-LABEL: test_mm_castps_si128
144 // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
145 return _mm_castps_si128(A);
146 }
147
test_mm_castsi128_pd(__m128i A)148 __m128d test_mm_castsi128_pd(__m128i A) {
149 // CHECK-LABEL: test_mm_castsi128_pd
150 // CHECK: bitcast <2 x i64> %{{.*}} to <2 x double>
151 return _mm_castsi128_pd(A);
152 }
153
test_mm_castsi128_ps(__m128i A)154 __m128 test_mm_castsi128_ps(__m128i A) {
155 // CHECK-LABEL: test_mm_castsi128_ps
156 // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
157 return _mm_castsi128_ps(A);
158 }
159
test_mm_clflush(void * A)160 void test_mm_clflush(void* A) {
161 // CHECK-LABEL: test_mm_clflush
162 // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}})
163 _mm_clflush(A);
164 }
165
test_mm_cmpeq_epi8(__m128i A,__m128i B)166 __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
167 // CHECK-LABEL: test_mm_cmpeq_epi8
168 // CHECK: icmp eq <16 x i8>
169 return _mm_cmpeq_epi8(A, B);
170 }
171
test_mm_cmpeq_epi16(__m128i A,__m128i B)172 __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
173 // CHECK-LABEL: test_mm_cmpeq_epi16
174 // CHECK: icmp eq <8 x i16>
175 return _mm_cmpeq_epi16(A, B);
176 }
177
test_mm_cmpeq_epi32(__m128i A,__m128i B)178 __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) {
179 // CHECK-LABEL: test_mm_cmpeq_epi32
180 // CHECK: icmp eq <4 x i32>
181 return _mm_cmpeq_epi32(A, B);
182 }
183
test_mm_cmpeq_pd(__m128d A,__m128d B)184 __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
185 // CHECK-LABEL: test_mm_cmpeq_pd
186 // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
187 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
188 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
189 // CHECK-NEXT: ret <2 x double> [[BC]]
190 return _mm_cmpeq_pd(A, B);
191 }
192
test_mm_cmpeq_sd(__m128d A,__m128d B)193 __m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
194 // CHECK-LABEL: test_mm_cmpeq_sd
195 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
196 return _mm_cmpeq_sd(A, B);
197 }
198
test_mm_cmpge_pd(__m128d A,__m128d B)199 __m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
200 // CHECK-LABEL: test_mm_cmpge_pd
201 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
202 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
203 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
204 // CHECK-NEXT: ret <2 x double> [[BC]]
205 return _mm_cmpge_pd(A, B);
206 }
207
test_mm_cmpge_sd(__m128d A,__m128d B)208 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
209 // CHECK-LABEL: test_mm_cmpge_sd
210 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
211 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
212 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
213 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
214 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
215 return _mm_cmpge_sd(A, B);
216 }
217
test_mm_cmpgt_epi8(__m128i A,__m128i B)218 __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
219 // CHECK-LABEL: test_mm_cmpgt_epi8
220 // CHECK: icmp sgt <16 x i8>
221 return _mm_cmpgt_epi8(A, B);
222 }
223
test_mm_cmpgt_epi16(__m128i A,__m128i B)224 __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
225 // CHECK-LABEL: test_mm_cmpgt_epi16
226 // CHECK: icmp sgt <8 x i16>
227 return _mm_cmpgt_epi16(A, B);
228 }
229
test_mm_cmpgt_epi32(__m128i A,__m128i B)230 __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
231 // CHECK-LABEL: test_mm_cmpgt_epi32
232 // CHECK: icmp sgt <4 x i32>
233 return _mm_cmpgt_epi32(A, B);
234 }
235
test_mm_cmpgt_pd(__m128d A,__m128d B)236 __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
237 // CHECK-LABEL: test_mm_cmpgt_pd
238 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
239 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
240 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
241 // CHECK-NEXT: ret <2 x double> [[BC]]
242 return _mm_cmpgt_pd(A, B);
243 }
244
test_mm_cmpgt_sd(__m128d A,__m128d B)245 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
246 // CHECK-LABEL: test_mm_cmpgt_sd
247 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
248 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
249 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
250 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
251 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
252 return _mm_cmpgt_sd(A, B);
253 }
254
test_mm_cmple_pd(__m128d A,__m128d B)255 __m128d test_mm_cmple_pd(__m128d A, __m128d B) {
256 // CHECK-LABEL: test_mm_cmple_pd
257 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
258 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
259 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
260 // CHECK-NEXT: ret <2 x double> [[BC]]
261 return _mm_cmple_pd(A, B);
262 }
263
test_mm_cmple_sd(__m128d A,__m128d B)264 __m128d test_mm_cmple_sd(__m128d A, __m128d B) {
265 // CHECK-LABEL: test_mm_cmple_sd
266 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
267 return _mm_cmple_sd(A, B);
268 }
269
test_mm_cmplt_epi8(__m128i A,__m128i B)270 __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
271 // CHECK-LABEL: test_mm_cmplt_epi8
272 // CHECK: icmp sgt <16 x i8>
273 return _mm_cmplt_epi8(A, B);
274 }
275
test_mm_cmplt_epi16(__m128i A,__m128i B)276 __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
277 // CHECK-LABEL: test_mm_cmplt_epi16
278 // CHECK: icmp sgt <8 x i16>
279 return _mm_cmplt_epi16(A, B);
280 }
281
test_mm_cmplt_epi32(__m128i A,__m128i B)282 __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) {
283 // CHECK-LABEL: test_mm_cmplt_epi32
284 // CHECK: icmp sgt <4 x i32>
285 return _mm_cmplt_epi32(A, B);
286 }
287
test_mm_cmplt_pd(__m128d A,__m128d B)288 __m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
289 // CHECK-LABEL: test_mm_cmplt_pd
290 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
291 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
292 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
293 // CHECK-NEXT: ret <2 x double> [[BC]]
294 return _mm_cmplt_pd(A, B);
295 }
296
test_mm_cmplt_sd(__m128d A,__m128d B)297 __m128d test_mm_cmplt_sd(__m128d A, __m128d B) {
298 // CHECK-LABEL: test_mm_cmplt_sd
299 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
300 return _mm_cmplt_sd(A, B);
301 }
302
test_mm_cmpneq_pd(__m128d A,__m128d B)303 __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
304 // CHECK-LABEL: test_mm_cmpneq_pd
305 // CHECK: [[CMP:%.*]] = fcmp une <2 x double>
306 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
307 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
308 // CHECK-NEXT: ret <2 x double> [[BC]]
309 return _mm_cmpneq_pd(A, B);
310 }
311
test_mm_cmpneq_sd(__m128d A,__m128d B)312 __m128d test_mm_cmpneq_sd(__m128d A, __m128d B) {
313 // CHECK-LABEL: test_mm_cmpneq_sd
314 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
315 return _mm_cmpneq_sd(A, B);
316 }
317
test_mm_cmpnge_pd(__m128d A,__m128d B)318 __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
319 // CHECK-LABEL: test_mm_cmpnge_pd
320 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
321 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
322 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
323 // CHECK-NEXT: ret <2 x double> [[BC]]
324 return _mm_cmpnge_pd(A, B);
325 }
326
test_mm_cmpnge_sd(__m128d A,__m128d B)327 __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
328 // CHECK-LABEL: test_mm_cmpnge_sd
329 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
330 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
331 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
332 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
333 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
334 return _mm_cmpnge_sd(A, B);
335 }
336
test_mm_cmpngt_pd(__m128d A,__m128d B)337 __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
338 // CHECK-LABEL: test_mm_cmpngt_pd
339 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
340 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
341 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
342 // CHECK-NEXT: ret <2 x double> [[BC]]
343 return _mm_cmpngt_pd(A, B);
344 }
345
test_mm_cmpngt_sd(__m128d A,__m128d B)346 __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
347 // CHECK-LABEL: test_mm_cmpngt_sd
348 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
349 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
350 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
351 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
352 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
353 return _mm_cmpngt_sd(A, B);
354 }
355
test_mm_cmpnle_pd(__m128d A,__m128d B)356 __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
357 // CHECK-LABEL: test_mm_cmpnle_pd
358 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
359 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
360 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
361 // CHECK-NEXT: ret <2 x double> [[BC]]
362 return _mm_cmpnle_pd(A, B);
363 }
364
test_mm_cmpnle_sd(__m128d A,__m128d B)365 __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
366 // CHECK-LABEL: test_mm_cmpnle_sd
367 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
368 return _mm_cmpnle_sd(A, B);
369 }
370
test_mm_cmpnlt_pd(__m128d A,__m128d B)371 __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
372 // CHECK-LABEL: test_mm_cmpnlt_pd
373 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
374 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
375 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
376 // CHECK-NEXT: ret <2 x double> [[BC]]
377 return _mm_cmpnlt_pd(A, B);
378 }
379
test_mm_cmpnlt_sd(__m128d A,__m128d B)380 __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
381 // CHECK-LABEL: test_mm_cmpnlt_sd
382 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
383 return _mm_cmpnlt_sd(A, B);
384 }
385
test_mm_cmpord_pd(__m128d A,__m128d B)386 __m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
387 // CHECK-LABEL: test_mm_cmpord_pd
388 // CHECK: [[CMP:%.*]] = fcmp ord <2 x double>
389 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
390 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
391 // CHECK-NEXT: ret <2 x double> [[BC]]
392 return _mm_cmpord_pd(A, B);
393 }
394
test_mm_cmpord_sd(__m128d A,__m128d B)395 __m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
396 // CHECK-LABEL: test_mm_cmpord_sd
397 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
398 return _mm_cmpord_sd(A, B);
399 }
400
test_mm_cmpunord_pd(__m128d A,__m128d B)401 __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
402 // CHECK-LABEL: test_mm_cmpunord_pd
403 // CHECK: [[CMP:%.*]] = fcmp uno <2 x double>
404 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
405 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
406 // CHECK-NEXT: ret <2 x double> [[BC]]
407 return _mm_cmpunord_pd(A, B);
408 }
409
test_mm_cmpunord_sd(__m128d A,__m128d B)410 __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
411 // CHECK-LABEL: test_mm_cmpunord_sd
412 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
413 return _mm_cmpunord_sd(A, B);
414 }
415
test_mm_comieq_sd(__m128d A,__m128d B)416 int test_mm_comieq_sd(__m128d A, __m128d B) {
417 // CHECK-LABEL: test_mm_comieq_sd
418 // CHECK: call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
419 return _mm_comieq_sd(A, B);
420 }
421
test_mm_comige_sd(__m128d A,__m128d B)422 int test_mm_comige_sd(__m128d A, __m128d B) {
423 // CHECK-LABEL: test_mm_comige_sd
424 // CHECK: call i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
425 return _mm_comige_sd(A, B);
426 }
427
test_mm_comigt_sd(__m128d A,__m128d B)428 int test_mm_comigt_sd(__m128d A, __m128d B) {
429 // CHECK-LABEL: test_mm_comigt_sd
430 // CHECK: call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
431 return _mm_comigt_sd(A, B);
432 }
433
test_mm_comile_sd(__m128d A,__m128d B)434 int test_mm_comile_sd(__m128d A, __m128d B) {
435 // CHECK-LABEL: test_mm_comile_sd
436 // CHECK: call i32 @llvm.x86.sse2.comile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
437 return _mm_comile_sd(A, B);
438 }
439
test_mm_comilt_sd(__m128d A,__m128d B)440 int test_mm_comilt_sd(__m128d A, __m128d B) {
441 // CHECK-LABEL: test_mm_comilt_sd
442 // CHECK: call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
443 return _mm_comilt_sd(A, B);
444 }
445
test_mm_comineq_sd(__m128d A,__m128d B)446 int test_mm_comineq_sd(__m128d A, __m128d B) {
447 // CHECK-LABEL: test_mm_comineq_sd
448 // CHECK: call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
449 return _mm_comineq_sd(A, B);
450 }
451
test_mm_cvtepi32_pd(__m128i A)452 __m128d test_mm_cvtepi32_pd(__m128i A) {
453 // CHECK-LABEL: test_mm_cvtepi32_pd
454 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1>
455 // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
456 return _mm_cvtepi32_pd(A);
457 }
458
test_mm_cvtepi32_ps(__m128i A)459 __m128 test_mm_cvtepi32_ps(__m128i A) {
460 // CHECK-LABEL: test_mm_cvtepi32_ps
461 // CHECK: call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %{{.*}})
462 return _mm_cvtepi32_ps(A);
463 }
464
test_mm_cvtpd_epi32(__m128d A)465 __m128i test_mm_cvtpd_epi32(__m128d A) {
466 // CHECK-LABEL: test_mm_cvtpd_epi32
467 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %{{.*}})
468 return _mm_cvtpd_epi32(A);
469 }
470
test_mm_cvtpd_ps(__m128d A)471 __m128 test_mm_cvtpd_ps(__m128d A) {
472 // CHECK-LABEL: test_mm_cvtpd_ps
473 // CHECK: call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %{{.*}})
474 return _mm_cvtpd_ps(A);
475 }
476
test_mm_cvtps_epi32(__m128 A)477 __m128i test_mm_cvtps_epi32(__m128 A) {
478 // CHECK-LABEL: test_mm_cvtps_epi32
479 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}})
480 return _mm_cvtps_epi32(A);
481 }
482
test_mm_cvtps_pd(__m128 A)483 __m128d test_mm_cvtps_pd(__m128 A) {
484 // CHECK-LABEL: test_mm_cvtps_pd
485 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
486 // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
487 return _mm_cvtps_pd(A);
488 }
489
test_mm_cvtsd_f64(__m128d A)490 double test_mm_cvtsd_f64(__m128d A) {
491 // CHECK-LABEL: test_mm_cvtsd_f64
492 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
493 return _mm_cvtsd_f64(A);
494 }
495
test_mm_cvtsd_si32(__m128d A)496 int test_mm_cvtsd_si32(__m128d A) {
497 // CHECK-LABEL: test_mm_cvtsd_si32
498 // CHECK: call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}})
499 return _mm_cvtsd_si32(A);
500 }
501
test_mm_cvtsd_si64(__m128d A)502 long long test_mm_cvtsd_si64(__m128d A) {
503 // CHECK-LABEL: test_mm_cvtsd_si64
504 // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}})
505 return _mm_cvtsd_si64(A);
506 }
507
test_mm_cvtsd_ss(__m128 A,__m128d B)508 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
509 // CHECK-LABEL: test_mm_cvtsd_ss
510 // CHECK: fptrunc double %{{.*}} to float
511 return _mm_cvtsd_ss(A, B);
512 }
513
test_mm_cvtsi128_si32(__m128i A)514 int test_mm_cvtsi128_si32(__m128i A) {
515 // CHECK-LABEL: test_mm_cvtsi128_si32
516 // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
517 return _mm_cvtsi128_si32(A);
518 }
519
test_mm_cvtsi128_si64(__m128i A)520 long long test_mm_cvtsi128_si64(__m128i A) {
521 // CHECK-LABEL: test_mm_cvtsi128_si64
522 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
523 return _mm_cvtsi128_si64(A);
524 }
525
test_mm_cvtsi32_sd(__m128d A,int B)526 __m128d test_mm_cvtsi32_sd(__m128d A, int B) {
527 // CHECK-LABEL: test_mm_cvtsi32_sd
528 // CHECK: sitofp i32 %{{.*}} to double
529 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
530 return _mm_cvtsi32_sd(A, B);
531 }
532
test_mm_cvtsi32_si128(int A)533 __m128i test_mm_cvtsi32_si128(int A) {
534 // CHECK-LABEL: test_mm_cvtsi32_si128
535 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
536 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
537 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
538 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
539 return _mm_cvtsi32_si128(A);
540 }
541
test_mm_cvtsi64_sd(__m128d A,long long B)542 __m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
543 // CHECK-LABEL: test_mm_cvtsi64_sd
544 // CHECK: sitofp i64 %{{.*}} to double
545 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
546 return _mm_cvtsi64_sd(A, B);
547 }
548
test_mm_cvtsi64_si128(long long A)549 __m128i test_mm_cvtsi64_si128(long long A) {
550 // CHECK-LABEL: test_mm_cvtsi64_si128
551 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
552 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
553 return _mm_cvtsi64_si128(A);
554 }
555
test_mm_cvtss_sd(__m128d A,__m128 B)556 __m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
557 // CHECK-LABEL: test_mm_cvtss_sd
558 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
559 // CHECK: fpext float %{{.*}} to double
560 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
561 return _mm_cvtss_sd(A, B);
562 }
563
test_mm_cvttpd_epi32(__m128d A)564 __m128i test_mm_cvttpd_epi32(__m128d A) {
565 // CHECK-LABEL: test_mm_cvttpd_epi32
566 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %{{.*}})
567 return _mm_cvttpd_epi32(A);
568 }
569
test_mm_cvttps_epi32(__m128 A)570 __m128i test_mm_cvttps_epi32(__m128 A) {
571 // CHECK-LABEL: test_mm_cvttps_epi32
572 // CHECK: fptosi <4 x float> %{{.*}} to <4 x i32>
573 return _mm_cvttps_epi32(A);
574 }
575
test_mm_cvttsd_si32(__m128d A)576 int test_mm_cvttsd_si32(__m128d A) {
577 // CHECK-LABEL: test_mm_cvttsd_si32
578 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
579 // CHECK: fptosi double %{{.*}} to i32
580 return _mm_cvttsd_si32(A);
581 }
582
test_mm_cvttsd_si64(__m128d A)583 long long test_mm_cvttsd_si64(__m128d A) {
584 // CHECK-LABEL: test_mm_cvttsd_si64
585 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
586 // CHECK: fptosi double %{{.*}} to i64
587 return _mm_cvttsd_si64(A);
588 }
589
test_mm_div_pd(__m128d A,__m128d B)590 __m128d test_mm_div_pd(__m128d A, __m128d B) {
591 // CHECK-LABEL: test_mm_div_pd
592 // CHECK: fdiv <2 x double>
593 return _mm_div_pd(A, B);
594 }
595
test_mm_div_sd(__m128d A,__m128d B)596 __m128d test_mm_div_sd(__m128d A, __m128d B) {
597 // CHECK-LABEL: test_mm_div_sd
598 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
599 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
600 // CHECK: fdiv double
601 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
602 return _mm_div_sd(A, B);
603 }
604
605 // Lowering to pextrw requires optimization.
test_mm_extract_epi16(__m128i A)606 int test_mm_extract_epi16(__m128i A) {
607 // CHECK-LABEL: test_mm_extract_epi16
608 // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
609 // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]]
610 // CHECK: zext i16 %{{.*}} to i32
611 return _mm_extract_epi16(A, 9);
612 }
613
test_mm_insert_epi16(__m128i A,int B)614 __m128i test_mm_insert_epi16(__m128i A, int B) {
615 // CHECK-LABEL: test_mm_insert_epi16
616 // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
617 // CHECK: insertelement <8 x i16> %{{.*}}, i32 [[x]]
618 return _mm_insert_epi16(A, B, 8);
619 }
620
test_mm_lfence()621 void test_mm_lfence() {
622 // CHECK-LABEL: test_mm_lfence
623 // CHECK: call void @llvm.x86.sse2.lfence()
624 _mm_lfence();
625 }
626
test_mm_load_pd(double const * A)627 __m128d test_mm_load_pd(double const* A) {
628 // CHECK-LABEL: test_mm_load_pd
629 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
630 return _mm_load_pd(A);
631 }
632
test_mm_load_pd1(double const * A)633 __m128d test_mm_load_pd1(double const* A) {
634 // CHECK-LABEL: test_mm_load_pd1
635 // CHECK: load double, double* %{{.*}}, align 8
636 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
637 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
638 return _mm_load_pd1(A);
639 }
640
test_mm_load_sd(double const * A)641 __m128d test_mm_load_sd(double const* A) {
642 // CHECK-LABEL: test_mm_load_sd
643 // CHECK: load double, double* %{{.*}}, align 1{{$}}
644 return _mm_load_sd(A);
645 }
646
test_mm_load_si128(__m128i const * A)647 __m128i test_mm_load_si128(__m128i const* A) {
648 // CHECK-LABEL: test_mm_load_si128
649 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
650 return _mm_load_si128(A);
651 }
652
test_mm_load1_pd(double const * A)653 __m128d test_mm_load1_pd(double const* A) {
654 // CHECK-LABEL: test_mm_load1_pd
655 // CHECK: load double, double* %{{.*}}, align 8
656 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
657 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
658 return _mm_load1_pd(A);
659 }
660
test_mm_loadh_pd(__m128d x,void * y)661 __m128d test_mm_loadh_pd(__m128d x, void* y) {
662 // CHECK-LABEL: test_mm_loadh_pd
663 // CHECK: load double, double* %{{.*}}, align 1{{$}}
664 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
665 return _mm_loadh_pd(x, y);
666 }
667
test_mm_loadl_epi64(__m128i * y)668 __m128i test_mm_loadl_epi64(__m128i* y) {
669 // CHECK: test_mm_loadl_epi64
670 // CHECK: load i64, i64* {{.*}}, align 1{{$}}
671 // CHECK: insertelement <2 x i64> undef, i64 {{.*}}, i32 0
672 // CHECK: insertelement <2 x i64> {{.*}}, i64 0, i32 1
673 return _mm_loadl_epi64(y);
674 }
675
test_mm_loadl_pd(__m128d x,void * y)676 __m128d test_mm_loadl_pd(__m128d x, void* y) {
677 // CHECK-LABEL: test_mm_loadl_pd
678 // CHECK: load double, double* %{{.*}}, align 1{{$}}
679 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
680 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
681 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
682 return _mm_loadl_pd(x, y);
683 }
684
test_mm_loadr_pd(double const * A)685 __m128d test_mm_loadr_pd(double const* A) {
686 // CHECK-LABEL: test_mm_loadr_pd
687 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
688 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
689 return _mm_loadr_pd(A);
690 }
691
test_mm_loadu_pd(double const * A)692 __m128d test_mm_loadu_pd(double const* A) {
693 // CHECK-LABEL: test_mm_loadu_pd
694 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}}
695 return _mm_loadu_pd(A);
696 }
697
test_mm_loadu_si128(__m128i const * A)698 __m128i test_mm_loadu_si128(__m128i const* A) {
699 // CHECK-LABEL: test_mm_loadu_si128
700 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}}
701 return _mm_loadu_si128(A);
702 }
703
test_mm_loadu_si64(void const * A)704 __m128i test_mm_loadu_si64(void const* A) {
705 // CHECK-LABEL: test_mm_loadu_si64
706 // CHECK: load i64, i64* %{{.*}}, align 1{{$}}
707 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
708 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
709 return _mm_loadu_si64(A);
710 }
711
test_mm_madd_epi16(__m128i A,__m128i B)712 __m128i test_mm_madd_epi16(__m128i A, __m128i B) {
713 // CHECK-LABEL: test_mm_madd_epi16
714 // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
715 return _mm_madd_epi16(A, B);
716 }
717
test_mm_maskmoveu_si128(__m128i A,__m128i B,char * C)718 void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) {
719 // CHECK-LABEL: test_mm_maskmoveu_si128
720 // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8* %{{.*}})
721 _mm_maskmoveu_si128(A, B, C);
722 }
723
test_mm_max_epi16(__m128i A,__m128i B)724 __m128i test_mm_max_epi16(__m128i A, __m128i B) {
725 // CHECK-LABEL: test_mm_max_epi16
726 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]]
727 // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
728 return _mm_max_epi16(A, B);
729 }
730
test_mm_max_epu8(__m128i A,__m128i B)731 __m128i test_mm_max_epu8(__m128i A, __m128i B) {
732 // CHECK-LABEL: test_mm_max_epu8
733 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]]
734 // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
735 return _mm_max_epu8(A, B);
736 }
737
test_mm_max_pd(__m128d A,__m128d B)738 __m128d test_mm_max_pd(__m128d A, __m128d B) {
739 // CHECK-LABEL: test_mm_max_pd
740 // CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
741 return _mm_max_pd(A, B);
742 }
743
test_mm_max_sd(__m128d A,__m128d B)744 __m128d test_mm_max_sd(__m128d A, __m128d B) {
745 // CHECK-LABEL: test_mm_max_sd
746 // CHECK: call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
747 return _mm_max_sd(A, B);
748 }
749
test_mm_mfence()750 void test_mm_mfence() {
751 // CHECK-LABEL: test_mm_mfence
752 // CHECK: call void @llvm.x86.sse2.mfence()
753 _mm_mfence();
754 }
755
test_mm_min_epi16(__m128i A,__m128i B)756 __m128i test_mm_min_epi16(__m128i A, __m128i B) {
757 // CHECK-LABEL: test_mm_min_epi16
758 // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]]
759 // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
760 return _mm_min_epi16(A, B);
761 }
762
test_mm_min_epu8(__m128i A,__m128i B)763 __m128i test_mm_min_epu8(__m128i A, __m128i B) {
764 // CHECK-LABEL: test_mm_min_epu8
765 // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]]
766 // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
767 return _mm_min_epu8(A, B);
768 }
769
test_mm_min_pd(__m128d A,__m128d B)770 __m128d test_mm_min_pd(__m128d A, __m128d B) {
771 // CHECK-LABEL: test_mm_min_pd
772 // CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
773 return _mm_min_pd(A, B);
774 }
775
test_mm_min_sd(__m128d A,__m128d B)776 __m128d test_mm_min_sd(__m128d A, __m128d B) {
777 // CHECK-LABEL: test_mm_min_sd
778 // CHECK: call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
779 return _mm_min_sd(A, B);
780 }
781
test_mm_move_epi64(__m128i A)782 __m128i test_mm_move_epi64(__m128i A) {
783 // CHECK-LABEL: test_mm_move_epi64
784 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
785 return _mm_move_epi64(A);
786 }
787
test_mm_move_sd(__m128d A,__m128d B)788 __m128d test_mm_move_sd(__m128d A, __m128d B) {
789 // CHECK-LABEL: test_mm_move_sd
790 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
791 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
792 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
793 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
794 return _mm_move_sd(A, B);
795 }
796
test_mm_movemask_epi8(__m128i A)797 int test_mm_movemask_epi8(__m128i A) {
798 // CHECK-LABEL: test_mm_movemask_epi8
799 // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
800 return _mm_movemask_epi8(A);
801 }
802
test_mm_movemask_pd(__m128d A)803 int test_mm_movemask_pd(__m128d A) {
804 // CHECK-LABEL: test_mm_movemask_pd
805 // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
806 return _mm_movemask_pd(A);
807 }
808
test_mm_mul_epu32(__m128i A,__m128i B)809 __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
810 // CHECK-LABEL: test_mm_mul_epu32
811 // CHECK: call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
812 return _mm_mul_epu32(A, B);
813 }
814
test_mm_mul_pd(__m128d A,__m128d B)815 __m128d test_mm_mul_pd(__m128d A, __m128d B) {
816 // CHECK-LABEL: test_mm_mul_pd
817 // CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
818 return _mm_mul_pd(A, B);
819 }
820
test_mm_mul_sd(__m128d A,__m128d B)821 __m128d test_mm_mul_sd(__m128d A, __m128d B) {
822 // CHECK-LABEL: test_mm_mul_sd
823 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
824 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
825 // CHECK: fmul double
826 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
827 return _mm_mul_sd(A, B);
828 }
829
test_mm_mulhi_epi16(__m128i A,__m128i B)830 __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
831 // CHECK-LABEL: test_mm_mulhi_epi16
832 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
833 return _mm_mulhi_epi16(A, B);
834 }
835
test_mm_mulhi_epu16(__m128i A,__m128i B)836 __m128i test_mm_mulhi_epu16(__m128i A, __m128i B) {
837 // CHECK-LABEL: test_mm_mulhi_epu16
838 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
839 return _mm_mulhi_epu16(A, B);
840 }
841
test_mm_mullo_epi16(__m128i A,__m128i B)842 __m128i test_mm_mullo_epi16(__m128i A, __m128i B) {
843 // CHECK-LABEL: test_mm_mullo_epi16
844 // CHECK: mul <8 x i16> %{{.*}}, %{{.*}}
845 return _mm_mullo_epi16(A, B);
846 }
847
test_mm_or_pd(__m128d A,__m128d B)848 __m128d test_mm_or_pd(__m128d A, __m128d B) {
849 // CHECK-LABEL: test_mm_or_pd
850 // CHECK: or <4 x i32> %{{.*}}, %{{.*}}
851 return _mm_or_pd(A, B);
852 }
853
test_mm_or_si128(__m128i A,__m128i B)854 __m128i test_mm_or_si128(__m128i A, __m128i B) {
855 // CHECK-LABEL: test_mm_or_si128
856 // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
857 return _mm_or_si128(A, B);
858 }
859
test_mm_packs_epi16(__m128i A,__m128i B)860 __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
861 // CHECK-LABEL: test_mm_packs_epi16
862 // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
863 return _mm_packs_epi16(A, B);
864 }
865
test_mm_packs_epi32(__m128i A,__m128i B)866 __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
867 // CHECK-LABEL: test_mm_packs_epi32
868 // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
869 return _mm_packs_epi32(A, B);
870 }
871
test_mm_packus_epi16(__m128i A,__m128i B)872 __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
873 // CHECK-LABEL: test_mm_packus_epi16
874 // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
875 return _mm_packus_epi16(A, B);
876 }
877
test_mm_pause()878 void test_mm_pause() {
879 // CHECK-LABEL: test_mm_pause
880 // CHECK: call void @llvm.x86.sse2.pause()
881 return _mm_pause();
882 }
883
test_mm_sad_epu8(__m128i A,__m128i B)884 __m128i test_mm_sad_epu8(__m128i A, __m128i B) {
885 // CHECK-LABEL: test_mm_sad_epu8
886 // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
887 return _mm_sad_epu8(A, B);
888 }
889
test_mm_set_epi8(char A,char B,char C,char D,char E,char F,char G,char H,char I,char J,char K,char L,char M,char N,char O,char P)890 __m128i test_mm_set_epi8(char A, char B, char C, char D,
891 char E, char F, char G, char H,
892 char I, char J, char K, char L,
893 char M, char N, char O, char P) {
894 // CHECK-LABEL: test_mm_set_epi8
895 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
896 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
897 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
898 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
899 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
900 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
901 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
902 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
903 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
904 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
905 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
906 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
907 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
908 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
909 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
910 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
911 return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
912 }
913
test_mm_set_epi16(short A,short B,short C,short D,short E,short F,short G,short H)914 __m128i test_mm_set_epi16(short A, short B, short C, short D,
915 short E, short F, short G, short H) {
916 // CHECK-LABEL: test_mm_set_epi16
917 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
918 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
919 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
920 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
921 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
922 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
923 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
924 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
925 return _mm_set_epi16(A, B, C, D, E, F, G, H);
926 }
927
test_mm_set_epi32(int A,int B,int C,int D)928 __m128i test_mm_set_epi32(int A, int B, int C, int D) {
929 // CHECK-LABEL: test_mm_set_epi32
930 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
931 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
932 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
933 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
934 return _mm_set_epi32(A, B, C, D);
935 }
936
test_mm_set_epi64(__m64 A,__m64 B)937 __m128i test_mm_set_epi64(__m64 A, __m64 B) {
938 // CHECK-LABEL: test_mm_set_epi64
939 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
940 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
941 return _mm_set_epi64(A, B);
942 }
943
test_mm_set_epi64x(long long A,long long B)944 __m128i test_mm_set_epi64x(long long A, long long B) {
945 // CHECK-LABEL: test_mm_set_epi64x
946 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
947 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
948 return _mm_set_epi64x(A, B);
949 }
950
test_mm_set_pd(double A,double B)951 __m128d test_mm_set_pd(double A, double B) {
952 // CHECK-LABEL: test_mm_set_pd
953 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
954 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
955 return _mm_set_pd(A, B);
956 }
957
test_mm_set_sd(double A)958 __m128d test_mm_set_sd(double A) {
959 // CHECK-LABEL: test_mm_set_sd
960 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
961 // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
962 return _mm_set_sd(A);
963 }
964
test_mm_set1_epi8(char A)965 __m128i test_mm_set1_epi8(char A) {
966 // CHECK-LABEL: test_mm_set1_epi8
967 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
968 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
969 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
970 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
971 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
972 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
973 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
974 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
975 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
976 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
977 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
978 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
979 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
980 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
981 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
982 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
983 return _mm_set1_epi8(A);
984 }
985
test_mm_set1_epi16(short A)986 __m128i test_mm_set1_epi16(short A) {
987 // CHECK-LABEL: test_mm_set1_epi16
988 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
989 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
990 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
991 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
992 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
993 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
994 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
995 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
996 return _mm_set1_epi16(A);
997 }
998
test_mm_set1_epi32(int A)999 __m128i test_mm_set1_epi32(int A) {
1000 // CHECK-LABEL: test_mm_set1_epi32
1001 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1002 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1003 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1004 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1005 return _mm_set1_epi32(A);
1006 }
1007
test_mm_set1_epi64(__m64 A)1008 __m128i test_mm_set1_epi64(__m64 A) {
1009 // CHECK-LABEL: test_mm_set1_epi64
1010 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1011 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1012 return _mm_set1_epi64(A);
1013 }
1014
test_mm_set1_epi64x(long long A)1015 __m128i test_mm_set1_epi64x(long long A) {
1016 // CHECK-LABEL: test_mm_set1_epi64x
1017 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1018 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1019 return _mm_set1_epi64x(A);
1020 }
1021
test_mm_set1_pd(double A)1022 __m128d test_mm_set1_pd(double A) {
1023 // CHECK-LABEL: test_mm_set1_pd
1024 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1025 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1026 return _mm_set1_pd(A);
1027 }
1028
test_mm_setr_epi8(char A,char B,char C,char D,char E,char F,char G,char H,char I,char J,char K,char L,char M,char N,char O,char P)1029 __m128i test_mm_setr_epi8(char A, char B, char C, char D,
1030 char E, char F, char G, char H,
1031 char I, char J, char K, char L,
1032 char M, char N, char O, char P) {
1033 // CHECK-LABEL: test_mm_setr_epi8
1034 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
1035 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
1036 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
1037 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
1038 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
1039 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
1040 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
1041 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
1042 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
1043 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
1044 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
1045 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
1046 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
1047 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
1048 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
1049 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
1050 return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
1051 }
1052
test_mm_setr_epi16(short A,short B,short C,short D,short E,short F,short G,short H)1053 __m128i test_mm_setr_epi16(short A, short B, short C, short D,
1054 short E, short F, short G, short H) {
1055 // CHECK-LABEL: test_mm_setr_epi16
1056 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
1057 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1058 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1059 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1060 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1061 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1062 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1063 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1064 return _mm_setr_epi16(A, B, C, D, E, F, G, H);
1065 }
1066
test_mm_setr_epi32(int A,int B,int C,int D)1067 __m128i test_mm_setr_epi32(int A, int B, int C, int D) {
1068 // CHECK-LABEL: test_mm_setr_epi32
1069 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1070 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1071 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1072 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1073 return _mm_setr_epi32(A, B, C, D);
1074 }
1075
test_mm_setr_epi64(__m64 A,__m64 B)1076 __m128i test_mm_setr_epi64(__m64 A, __m64 B) {
1077 // CHECK-LABEL: test_mm_setr_epi64
1078 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1079 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1080 return _mm_setr_epi64(A, B);
1081 }
1082
test_mm_setr_pd(double A,double B)1083 __m128d test_mm_setr_pd(double A, double B) {
1084 // CHECK-LABEL: test_mm_setr_pd
1085 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1086 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1087 return _mm_setr_pd(A, B);
1088 }
1089
test_mm_setzero_pd()1090 __m128d test_mm_setzero_pd() {
1091 // CHECK-LABEL: test_mm_setzero_pd
1092 // CHECK: store <2 x double> zeroinitializer
1093 return _mm_setzero_pd();
1094 }
1095
test_mm_setzero_si128()1096 __m128i test_mm_setzero_si128() {
1097 // CHECK-LABEL: test_mm_setzero_si128
1098 // CHECK: store <2 x i64> zeroinitializer
1099 return _mm_setzero_si128();
1100 }
1101
test_mm_shuffle_epi32(__m128i A)1102 __m128i test_mm_shuffle_epi32(__m128i A) {
1103 // CHECK-LABEL: test_mm_shuffle_epi32
1104 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
1105 return _mm_shuffle_epi32(A, 0);
1106 }
1107
test_mm_shuffle_pd(__m128d A,__m128d B)1108 __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
1109 // CHECK-LABEL: test_mm_shuffle_pd
1110 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
1111 return _mm_shuffle_pd(A, B, 1);
1112 }
1113
test_mm_shufflehi_epi16(__m128i A)1114 __m128i test_mm_shufflehi_epi16(__m128i A) {
1115 // CHECK-LABEL: test_mm_shufflehi_epi16
1116 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
1117 return _mm_shufflehi_epi16(A, 0);
1118 }
1119
test_mm_shufflelo_epi16(__m128i A)1120 __m128i test_mm_shufflelo_epi16(__m128i A) {
1121 // CHECK-LABEL: test_mm_shufflelo_epi16
1122 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
1123 return _mm_shufflelo_epi16(A, 0);
1124 }
1125
test_mm_sll_epi16(__m128i A,__m128i B)1126 __m128i test_mm_sll_epi16(__m128i A, __m128i B) {
1127 // CHECK-LABEL: test_mm_sll_epi16
1128 // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1129 return _mm_sll_epi16(A, B);
1130 }
1131
test_mm_sll_epi32(__m128i A,__m128i B)1132 __m128i test_mm_sll_epi32(__m128i A, __m128i B) {
1133 // CHECK-LABEL: test_mm_sll_epi32
1134 // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1135 return _mm_sll_epi32(A, B);
1136 }
1137
test_mm_sll_epi64(__m128i A,__m128i B)1138 __m128i test_mm_sll_epi64(__m128i A, __m128i B) {
1139 // CHECK-LABEL: test_mm_sll_epi64
1140 // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1141 return _mm_sll_epi64(A, B);
1142 }
1143
test_mm_slli_epi16(__m128i A)1144 __m128i test_mm_slli_epi16(__m128i A) {
1145 // CHECK-LABEL: test_mm_slli_epi16
1146 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1147 return _mm_slli_epi16(A, 1);
1148 }
1149
test_mm_slli_epi32(__m128i A)1150 __m128i test_mm_slli_epi32(__m128i A) {
1151 // CHECK-LABEL: test_mm_slli_epi32
1152 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1153 return _mm_slli_epi32(A, 1);
1154 }
1155
test_mm_slli_epi64(__m128i A)1156 __m128i test_mm_slli_epi64(__m128i A) {
1157 // CHECK-LABEL: test_mm_slli_epi64
1158 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1159 return _mm_slli_epi64(A, 1);
1160 }
1161
test_mm_slli_si128(__m128i A)1162 __m128i test_mm_slli_si128(__m128i A) {
1163 // CHECK-LABEL: test_mm_slli_si128
1164 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
1165 return _mm_slli_si128(A, 5);
1166 }
1167
test_mm_slli_si128_2(__m128i A)1168 __m128i test_mm_slli_si128_2(__m128i A) {
1169 // CHECK-LABEL: test_mm_slli_si128_2
1170 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1171 return _mm_slli_si128(A, 17);
1172 }
1173
test_mm_sqrt_pd(__m128d A)1174 __m128d test_mm_sqrt_pd(__m128d A) {
1175 // CHECK-LABEL: test_mm_sqrt_pd
1176 // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %{{.*}})
1177 return _mm_sqrt_pd(A);
1178 }
1179
test_mm_sqrt_sd(__m128d A,__m128d B)1180 __m128d test_mm_sqrt_sd(__m128d A, __m128d B) {
1181 // CHECK-LABEL: test_mm_sqrt_sd
1182 // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %{{.*}})
1183 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1184 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1185 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
1186 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1187 return _mm_sqrt_sd(A, B);
1188 }
1189
test_mm_sra_epi16(__m128i A,__m128i B)1190 __m128i test_mm_sra_epi16(__m128i A, __m128i B) {
1191 // CHECK-LABEL: test_mm_sra_epi16
1192 // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1193 return _mm_sra_epi16(A, B);
1194 }
1195
test_mm_sra_epi32(__m128i A,__m128i B)1196 __m128i test_mm_sra_epi32(__m128i A, __m128i B) {
1197 // CHECK-LABEL: test_mm_sra_epi32
1198 // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1199 return _mm_sra_epi32(A, B);
1200 }
1201
test_mm_srai_epi16(__m128i A)1202 __m128i test_mm_srai_epi16(__m128i A) {
1203 // CHECK-LABEL: test_mm_srai_epi16
1204 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1205 return _mm_srai_epi16(A, 1);
1206 }
1207
test_mm_srai_epi32(__m128i A)1208 __m128i test_mm_srai_epi32(__m128i A) {
1209 // CHECK-LABEL: test_mm_srai_epi32
1210 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1211 return _mm_srai_epi32(A, 1);
1212 }
1213
test_mm_srl_epi16(__m128i A,__m128i B)1214 __m128i test_mm_srl_epi16(__m128i A, __m128i B) {
1215 // CHECK-LABEL: test_mm_srl_epi16
1216 // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1217 return _mm_srl_epi16(A, B);
1218 }
1219
test_mm_srl_epi32(__m128i A,__m128i B)1220 __m128i test_mm_srl_epi32(__m128i A, __m128i B) {
1221 // CHECK-LABEL: test_mm_srl_epi32
1222 // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1223 return _mm_srl_epi32(A, B);
1224 }
1225
test_mm_srl_epi64(__m128i A,__m128i B)1226 __m128i test_mm_srl_epi64(__m128i A, __m128i B) {
1227 // CHECK-LABEL: test_mm_srl_epi64
1228 // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1229 return _mm_srl_epi64(A, B);
1230 }
1231
test_mm_srli_epi16(__m128i A)1232 __m128i test_mm_srli_epi16(__m128i A) {
1233 // CHECK-LABEL: test_mm_srli_epi16
1234 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1235 return _mm_srli_epi16(A, 1);
1236 }
1237
test_mm_srli_epi32(__m128i A)1238 __m128i test_mm_srli_epi32(__m128i A) {
1239 // CHECK-LABEL: test_mm_srli_epi32
1240 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1241 return _mm_srli_epi32(A, 1);
1242 }
1243
test_mm_srli_epi64(__m128i A)1244 __m128i test_mm_srli_epi64(__m128i A) {
1245 // CHECK-LABEL: test_mm_srli_epi64
1246 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1247 return _mm_srli_epi64(A, 1);
1248 }
1249
test_mm_srli_si128(__m128i A)1250 __m128i test_mm_srli_si128(__m128i A) {
1251 // CHECK-LABEL: test_mm_srli_si128
1252 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
1253 return _mm_srli_si128(A, 5);
1254 }
1255
test_mm_srli_si128_2(__m128i A)1256 __m128i test_mm_srli_si128_2(__m128i A) {
1257 // CHECK-LABEL: test_mm_srli_si128_2
1258 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
1259 return _mm_srli_si128(A, 17);
1260 }
1261
test_mm_store_pd(double * A,__m128d B)1262 void test_mm_store_pd(double* A, __m128d B) {
1263 // CHECK-LABEL: test_mm_store_pd
1264 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1265 _mm_store_pd(A, B);
1266 }
1267
test_mm_store_pd1(double * x,__m128d y)1268 void test_mm_store_pd1(double* x, __m128d y) {
1269 // CHECK-LABEL: test_mm_store_pd1
1270 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1271 // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
1272 _mm_store_pd1(x, y);
1273 }
1274
test_mm_store_sd(double * A,__m128d B)1275 void test_mm_store_sd(double* A, __m128d B) {
1276 // CHECK-LABEL: test_mm_store_sd
1277 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1278 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1279 _mm_store_sd(A, B);
1280 }
1281
test_mm_store_si128(__m128i * A,__m128i B)1282 void test_mm_store_si128(__m128i* A, __m128i B) {
1283 // CHECK-LABEL: test_mm_store_si128
1284 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
1285 _mm_store_si128(A, B);
1286 }
1287
test_mm_store1_pd(double * x,__m128d y)1288 void test_mm_store1_pd(double* x, __m128d y) {
1289 // CHECK-LABEL: test_mm_store1_pd
1290 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1291 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1292 _mm_store1_pd(x, y);
1293 }
1294
test_mm_storeh_pd(double * A,__m128d B)1295 void test_mm_storeh_pd(double* A, __m128d B) {
1296 // CHECK-LABEL: test_mm_storeh_pd
1297 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
1298 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1299 _mm_storeh_pd(A, B);
1300 }
1301
test_mm_storel_epi64(__m128i x,void * y)1302 void test_mm_storel_epi64(__m128i x, void* y) {
1303 // CHECK-LABEL: test_mm_storel_epi64
1304 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
1305 // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
1306 _mm_storel_epi64(y, x);
1307 }
1308
test_mm_storel_pd(double * A,__m128d B)1309 void test_mm_storel_pd(double* A, __m128d B) {
1310 // CHECK-LABEL: test_mm_storel_pd
1311 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1312 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1313 _mm_storel_pd(A, B);
1314 }
1315
test_mm_storer_pd(__m128d A,double * B)1316 void test_mm_storer_pd(__m128d A, double* B) {
1317 // CHECK-LABEL: test_mm_storer_pd
1318 // CHECK: shufflevector <2 x double> {{.*}}, <2 x double> {{.*}}, <2 x i32> <i32 1, i32 0>
1319 // CHECK: store {{.*}} <2 x double>* {{.*}}, align 16{{$}}
1320 _mm_storer_pd(B, A);
1321 }
1322
test_mm_storeu_pd(double * A,__m128d B)1323 void test_mm_storeu_pd(double* A, __m128d B) {
1324 // CHECK-LABEL: test_mm_storeu_pd
1325 // CHECK: store {{.*}} <2 x double>* {{.*}}, align 1{{$}}
1326 // CHECK-NEXT: ret void
1327 _mm_storeu_pd(A, B);
1328 }
1329
test_mm_storeu_si128(__m128i * A,__m128i B)1330 void test_mm_storeu_si128(__m128i* A, __m128i B) {
1331 // CHECK-LABEL: test_mm_storeu_si128
1332 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}}
1333 // CHECK-NEXT: ret void
1334 _mm_storeu_si128(A, B);
1335 }
1336
test_mm_stream_pd(double * A,__m128d B)1337 void test_mm_stream_pd(double *A, __m128d B) {
1338 // CHECK-LABEL: test_mm_stream_pd
1339 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal
1340 _mm_stream_pd(A, B);
1341 }
1342
test_mm_stream_si32(int * A,int B)1343 void test_mm_stream_si32(int *A, int B) {
1344 // CHECK-LABEL: test_mm_stream_si32
1345 // CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1, !nontemporal
1346 _mm_stream_si32(A, B);
1347 }
1348
test_mm_stream_si64(long long * A,long long B)1349 void test_mm_stream_si64(long long *A, long long B) {
1350 // CHECK-LABEL: test_mm_stream_si64
1351 // CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 1, !nontemporal
1352 _mm_stream_si64(A, B);
1353 }
1354
test_mm_stream_si128(__m128i * A,__m128i B)1355 void test_mm_stream_si128(__m128i *A, __m128i B) {
1356 // CHECK-LABEL: test_mm_stream_si128
1357 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16, !nontemporal
1358 _mm_stream_si128(A, B);
1359 }
1360
test_mm_sub_epi8(__m128i A,__m128i B)1361 __m128i test_mm_sub_epi8(__m128i A, __m128i B) {
1362 // CHECK-LABEL: test_mm_sub_epi8
1363 // CHECK: sub <16 x i8>
1364 return _mm_sub_epi8(A, B);
1365 }
1366
test_mm_sub_epi16(__m128i A,__m128i B)1367 __m128i test_mm_sub_epi16(__m128i A, __m128i B) {
1368 // CHECK-LABEL: test_mm_sub_epi16
1369 // CHECK: sub <8 x i16>
1370 return _mm_sub_epi16(A, B);
1371 }
1372
test_mm_sub_epi32(__m128i A,__m128i B)1373 __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
1374 // CHECK-LABEL: test_mm_sub_epi32
1375 // CHECK: sub <4 x i32>
1376 return _mm_sub_epi32(A, B);
1377 }
1378
test_mm_sub_epi64(__m128i A,__m128i B)1379 __m128i test_mm_sub_epi64(__m128i A, __m128i B) {
1380 // CHECK-LABEL: test_mm_sub_epi64
1381 // CHECK: sub <2 x i64>
1382 return _mm_sub_epi64(A, B);
1383 }
1384
test_mm_sub_pd(__m128d A,__m128d B)1385 __m128d test_mm_sub_pd(__m128d A, __m128d B) {
1386 // CHECK-LABEL: test_mm_sub_pd
1387 // CHECK: fsub <2 x double>
1388 return _mm_sub_pd(A, B);
1389 }
1390
test_mm_sub_sd(__m128d A,__m128d B)1391 __m128d test_mm_sub_sd(__m128d A, __m128d B) {
1392 // CHECK-LABEL: test_mm_sub_sd
1393 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1394 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1395 // CHECK: fsub double
1396 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
1397 return _mm_sub_sd(A, B);
1398 }
1399
test_mm_subs_epi8(__m128i A,__m128i B)1400 __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
1401 // CHECK-LABEL: test_mm_subs_epi8
1402 // CHECK: call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1403 return _mm_subs_epi8(A, B);
1404 }
1405
test_mm_subs_epi16(__m128i A,__m128i B)1406 __m128i test_mm_subs_epi16(__m128i A, __m128i B) {
1407 // CHECK-LABEL: test_mm_subs_epi16
1408 // CHECK: call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1409 return _mm_subs_epi16(A, B);
1410 }
1411
test_mm_subs_epu8(__m128i A,__m128i B)1412 __m128i test_mm_subs_epu8(__m128i A, __m128i B) {
1413 // CHECK-LABEL: test_mm_subs_epu8
1414 // CHECK: call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1415 return _mm_subs_epu8(A, B);
1416 }
1417
test_mm_subs_epu16(__m128i A,__m128i B)1418 __m128i test_mm_subs_epu16(__m128i A, __m128i B) {
1419 // CHECK-LABEL: test_mm_subs_epu16
1420 // CHECK: call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1421 return _mm_subs_epu16(A, B);
1422 }
1423
test_mm_ucomieq_sd(__m128d A,__m128d B)1424 int test_mm_ucomieq_sd(__m128d A, __m128d B) {
1425 // CHECK-LABEL: test_mm_ucomieq_sd
1426 // CHECK: call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1427 return _mm_ucomieq_sd(A, B);
1428 }
1429
test_mm_ucomige_sd(__m128d A,__m128d B)1430 int test_mm_ucomige_sd(__m128d A, __m128d B) {
1431 // CHECK-LABEL: test_mm_ucomige_sd
1432 // CHECK: call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1433 return _mm_ucomige_sd(A, B);
1434 }
1435
test_mm_ucomigt_sd(__m128d A,__m128d B)1436 int test_mm_ucomigt_sd(__m128d A, __m128d B) {
1437 // CHECK-LABEL: test_mm_ucomigt_sd
1438 // CHECK: call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1439 return _mm_ucomigt_sd(A, B);
1440 }
1441
test_mm_ucomile_sd(__m128d A,__m128d B)1442 int test_mm_ucomile_sd(__m128d A, __m128d B) {
1443 // CHECK-LABEL: test_mm_ucomile_sd
1444 // CHECK: call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1445 return _mm_ucomile_sd(A, B);
1446 }
1447
test_mm_ucomilt_sd(__m128d A,__m128d B)1448 int test_mm_ucomilt_sd(__m128d A, __m128d B) {
1449 // CHECK-LABEL: test_mm_ucomilt_sd
1450 // CHECK: call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1451 return _mm_ucomilt_sd(A, B);
1452 }
1453
test_mm_ucomineq_sd(__m128d A,__m128d B)1454 int test_mm_ucomineq_sd(__m128d A, __m128d B) {
1455 // CHECK-LABEL: test_mm_ucomineq_sd
1456 // CHECK: call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1457 return _mm_ucomineq_sd(A, B);
1458 }
1459
test_mm_undefined_pd()1460 __m128d test_mm_undefined_pd() {
1461 // CHECK-LABEL: @test_mm_undefined_pd
1462 // CHECK: ret <2 x double> undef
1463 return _mm_undefined_pd();
1464 }
1465
test_mm_undefined_si128()1466 __m128i test_mm_undefined_si128() {
1467 // CHECK-LABEL: @test_mm_undefined_si128
1468 // CHECK: ret <2 x i64> undef
1469 return _mm_undefined_si128();
1470 }
1471
test_mm_unpackhi_epi8(__m128i A,__m128i B)1472 __m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) {
1473 // CHECK-LABEL: test_mm_unpackhi_epi8
1474 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1475 return _mm_unpackhi_epi8(A, B);
1476 }
1477
test_mm_unpackhi_epi16(__m128i A,__m128i B)1478 __m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) {
1479 // CHECK-LABEL: test_mm_unpackhi_epi16
1480 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1481 return _mm_unpackhi_epi16(A, B);
1482 }
1483
test_mm_unpackhi_epi32(__m128i A,__m128i B)1484 __m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) {
1485 // CHECK-LABEL: test_mm_unpackhi_epi32
1486 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1487 return _mm_unpackhi_epi32(A, B);
1488 }
1489
test_mm_unpackhi_epi64(__m128i A,__m128i B)1490 __m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) {
1491 // CHECK-LABEL: test_mm_unpackhi_epi64
1492 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
1493 return _mm_unpackhi_epi64(A, B);
1494 }
1495
test_mm_unpackhi_pd(__m128d A,__m128d B)1496 __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
1497 // CHECK-LABEL: test_mm_unpackhi_pd
1498 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
1499 return _mm_unpackhi_pd(A, B);
1500 }
1501
test_mm_unpacklo_epi8(__m128i A,__m128i B)1502 __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
1503 // CHECK-LABEL: test_mm_unpacklo_epi8
1504 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1505 return _mm_unpacklo_epi8(A, B);
1506 }
1507
test_mm_unpacklo_epi16(__m128i A,__m128i B)1508 __m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) {
1509 // CHECK-LABEL: test_mm_unpacklo_epi16
1510 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1511 return _mm_unpacklo_epi16(A, B);
1512 }
1513
test_mm_unpacklo_epi32(__m128i A,__m128i B)1514 __m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) {
1515 // CHECK-LABEL: test_mm_unpacklo_epi32
1516 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1517 return _mm_unpacklo_epi32(A, B);
1518 }
1519
test_mm_unpacklo_epi64(__m128i A,__m128i B)1520 __m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) {
1521 // CHECK-LABEL: test_mm_unpacklo_epi64
1522 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
1523 return _mm_unpacklo_epi64(A, B);
1524 }
1525
test_mm_unpacklo_pd(__m128d A,__m128d B)1526 __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
1527 // CHECK-LABEL: test_mm_unpacklo_pd
1528 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
1529 return _mm_unpacklo_pd(A, B);
1530 }
1531
test_mm_xor_pd(__m128d A,__m128d B)1532 __m128d test_mm_xor_pd(__m128d A, __m128d B) {
1533 // CHECK-LABEL: test_mm_xor_pd
1534 // CHECK: xor <4 x i32> %{{.*}}, %{{.*}}
1535 return _mm_xor_pd(A, B);
1536 }
1537
test_mm_xor_si128(__m128i A,__m128i B)1538 __m128i test_mm_xor_si128(__m128i A, __m128i B) {
1539 // CHECK-LABEL: test_mm_xor_si128
1540 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1541 return _mm_xor_si128(A, B);
1542 }
1543