• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Werror | FileCheck %s
2 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
3 
4 // Don't include mm_malloc.h, it's system specific.
5 #define __MM_MALLOC_H
6 
7 #include <x86intrin.h>
8 
9 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
10 
test_mm_add_epi8(__m128i A,__m128i B)11 __m128i test_mm_add_epi8(__m128i A, __m128i B) {
12   // CHECK-LABEL: test_mm_add_epi8
13   // CHECK: add <16 x i8>
14   return _mm_add_epi8(A, B);
15 }
16 
test_mm_add_epi16(__m128i A,__m128i B)17 __m128i test_mm_add_epi16(__m128i A, __m128i B) {
18   // CHECK-LABEL: test_mm_add_epi16
19   // CHECK: add <8 x i16>
20   return _mm_add_epi16(A, B);
21 }
22 
test_mm_add_epi32(__m128i A,__m128i B)23 __m128i test_mm_add_epi32(__m128i A, __m128i B) {
24   // CHECK-LABEL: test_mm_add_epi32
25   // CHECK: add <4 x i32>
26   return _mm_add_epi32(A, B);
27 }
28 
test_mm_add_epi64(__m128i A,__m128i B)29 __m128i test_mm_add_epi64(__m128i A, __m128i B) {
30   // CHECK-LABEL: test_mm_add_epi64
31   // CHECK: add <2 x i64>
32   return _mm_add_epi64(A, B);
33 }
34 
test_mm_add_pd(__m128d A,__m128d B)35 __m128d test_mm_add_pd(__m128d A, __m128d B) {
36   // CHECK-LABEL: test_mm_add_pd
37   // CHECK: fadd <2 x double>
38   return _mm_add_pd(A, B);
39 }
40 
test_mm_add_sd(__m128d A,__m128d B)41 __m128d test_mm_add_sd(__m128d A, __m128d B) {
42   // CHECK-LABEL: test_mm_add_sd
43   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
44   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
45   // CHECK: fadd double
46   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
47   return _mm_add_sd(A, B);
48 }
49 
test_mm_adds_epi8(__m128i A,__m128i B)50 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
51   // CHECK-LABEL: test_mm_adds_epi8
52   // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
53   return _mm_adds_epi8(A, B);
54 }
55 
test_mm_adds_epi16(__m128i A,__m128i B)56 __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
57   // CHECK-LABEL: test_mm_adds_epi16
58   // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
59   return _mm_adds_epi16(A, B);
60 }
61 
test_mm_adds_epu8(__m128i A,__m128i B)62 __m128i test_mm_adds_epu8(__m128i A, __m128i B) {
63   // CHECK-LABEL: test_mm_adds_epu8
64   // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
65   return _mm_adds_epu8(A, B);
66 }
67 
test_mm_adds_epu16(__m128i A,__m128i B)68 __m128i test_mm_adds_epu16(__m128i A, __m128i B) {
69   // CHECK-LABEL: test_mm_adds_epu16
70   // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
71   return _mm_adds_epu16(A, B);
72 }
73 
test_mm_and_pd(__m128d A,__m128d B)74 __m128d test_mm_and_pd(__m128d A, __m128d B) {
75   // CHECK-LABEL: test_mm_and_pd
76   // CHECK: and <4 x i32>
77   return _mm_and_pd(A, B);
78 }
79 
test_mm_and_si128(__m128i A,__m128i B)80 __m128i test_mm_and_si128(__m128i A, __m128i B) {
81   // CHECK-LABEL: test_mm_and_si128
82   // CHECK: and <2 x i64>
83   return _mm_and_si128(A, B);
84 }
85 
test_mm_andnot_pd(__m128d A,__m128d B)86 __m128d test_mm_andnot_pd(__m128d A, __m128d B) {
87   // CHECK-LABEL: test_mm_andnot_pd
88   // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
89   // CHECK: and <4 x i32>
90   return _mm_andnot_pd(A, B);
91 }
92 
test_mm_andnot_si128(__m128i A,__m128i B)93 __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
94   // CHECK-LABEL: test_mm_andnot_si128
95   // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
96   // CHECK: and <2 x i64>
97   return _mm_andnot_si128(A, B);
98 }
99 
test_mm_avg_epu8(__m128i A,__m128i B)100 __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
101   // CHECK-LABEL: test_mm_avg_epu8
102   // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
103   return _mm_avg_epu8(A, B);
104 }
105 
test_mm_avg_epu16(__m128i A,__m128i B)106 __m128i test_mm_avg_epu16(__m128i A, __m128i B) {
107   // CHECK-LABEL: test_mm_avg_epu16
108   // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
109   return _mm_avg_epu16(A, B);
110 }
111 
test_mm_bslli_si128(__m128i A)112 __m128i test_mm_bslli_si128(__m128i A) {
113   // CHECK-LABEL: test_mm_bslli_si128
114   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
115   return _mm_bslli_si128(A, 5);
116 }
117 
test_mm_bsrli_si128(__m128i A)118 __m128i test_mm_bsrli_si128(__m128i A) {
119   // CHECK-LABEL: test_mm_bsrli_si128
120   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
121   return _mm_bsrli_si128(A, 5);
122 }
123 
test_mm_castpd_ps(__m128d A)124 __m128 test_mm_castpd_ps(__m128d A) {
125   // CHECK-LABEL: test_mm_castpd_ps
126   // CHECK: bitcast <2 x double> %{{.*}} to <4 x float>
127   return _mm_castpd_ps(A);
128 }
129 
test_mm_castpd_si128(__m128d A)130 __m128i test_mm_castpd_si128(__m128d A) {
131   // CHECK-LABEL: test_mm_castpd_si128
132   // CHECK: bitcast <2 x double> %{{.*}} to <2 x i64>
133   return _mm_castpd_si128(A);
134 }
135 
test_mm_castps_pd(__m128 A)136 __m128d test_mm_castps_pd(__m128 A) {
137   // CHECK-LABEL: test_mm_castps_pd
138   // CHECK: bitcast <4 x float> %{{.*}} to <2 x double>
139   return _mm_castps_pd(A);
140 }
141 
test_mm_castps_si128(__m128 A)142 __m128i test_mm_castps_si128(__m128 A) {
143   // CHECK-LABEL: test_mm_castps_si128
144   // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
145   return _mm_castps_si128(A);
146 }
147 
test_mm_castsi128_pd(__m128i A)148 __m128d test_mm_castsi128_pd(__m128i A) {
149   // CHECK-LABEL: test_mm_castsi128_pd
150   // CHECK: bitcast <2 x i64> %{{.*}} to <2 x double>
151   return _mm_castsi128_pd(A);
152 }
153 
test_mm_castsi128_ps(__m128i A)154 __m128 test_mm_castsi128_ps(__m128i A) {
155   // CHECK-LABEL: test_mm_castsi128_ps
156   // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
157   return _mm_castsi128_ps(A);
158 }
159 
test_mm_clflush(void * A)160 void test_mm_clflush(void* A) {
161   // CHECK-LABEL: test_mm_clflush
162   // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}})
163   _mm_clflush(A);
164 }
165 
test_mm_cmpeq_epi8(__m128i A,__m128i B)166 __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
167   // CHECK-LABEL: test_mm_cmpeq_epi8
168   // CHECK: icmp eq <16 x i8>
169   return _mm_cmpeq_epi8(A, B);
170 }
171 
test_mm_cmpeq_epi16(__m128i A,__m128i B)172 __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
173   // CHECK-LABEL: test_mm_cmpeq_epi16
174   // CHECK: icmp eq <8 x i16>
175   return _mm_cmpeq_epi16(A, B);
176 }
177 
test_mm_cmpeq_epi32(__m128i A,__m128i B)178 __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) {
179   // CHECK-LABEL: test_mm_cmpeq_epi32
180   // CHECK: icmp eq <4 x i32>
181   return _mm_cmpeq_epi32(A, B);
182 }
183 
test_mm_cmpeq_pd(__m128d A,__m128d B)184 __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
185   // CHECK-LABEL: test_mm_cmpeq_pd
186   // CHECK:         [[CMP:%.*]] = fcmp oeq <2 x double>
187   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
188   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
189   // CHECK-NEXT:    ret <2 x double> [[BC]]
190   return _mm_cmpeq_pd(A, B);
191 }
192 
test_mm_cmpeq_sd(__m128d A,__m128d B)193 __m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
194   // CHECK-LABEL: test_mm_cmpeq_sd
195   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
196   return _mm_cmpeq_sd(A, B);
197 }
198 
test_mm_cmpge_pd(__m128d A,__m128d B)199 __m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
200   // CHECK-LABEL: test_mm_cmpge_pd
201   // CHECK:         [[CMP:%.*]] = fcmp ole <2 x double>
202   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
203   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
204   // CHECK-NEXT:    ret <2 x double> [[BC]]
205   return _mm_cmpge_pd(A, B);
206 }
207 
test_mm_cmpge_sd(__m128d A,__m128d B)208 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
209   // CHECK-LABEL: test_mm_cmpge_sd
210   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
211   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
212   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
213   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
214   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
215   return _mm_cmpge_sd(A, B);
216 }
217 
test_mm_cmpgt_epi8(__m128i A,__m128i B)218 __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
219   // CHECK-LABEL: test_mm_cmpgt_epi8
220   // CHECK: icmp sgt <16 x i8>
221   return _mm_cmpgt_epi8(A, B);
222 }
223 
test_mm_cmpgt_epi16(__m128i A,__m128i B)224 __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
225   // CHECK-LABEL: test_mm_cmpgt_epi16
226   // CHECK: icmp sgt <8 x i16>
227   return _mm_cmpgt_epi16(A, B);
228 }
229 
test_mm_cmpgt_epi32(__m128i A,__m128i B)230 __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
231   // CHECK-LABEL: test_mm_cmpgt_epi32
232   // CHECK: icmp sgt <4 x i32>
233   return _mm_cmpgt_epi32(A, B);
234 }
235 
test_mm_cmpgt_pd(__m128d A,__m128d B)236 __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
237   // CHECK-LABEL: test_mm_cmpgt_pd
238   // CHECK:         [[CMP:%.*]] = fcmp olt <2 x double>
239   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
240   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
241   // CHECK-NEXT:    ret <2 x double> [[BC]]
242   return _mm_cmpgt_pd(A, B);
243 }
244 
test_mm_cmpgt_sd(__m128d A,__m128d B)245 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
246   // CHECK-LABEL: test_mm_cmpgt_sd
247   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
248   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
249   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
250   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
251   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
252   return _mm_cmpgt_sd(A, B);
253 }
254 
test_mm_cmple_pd(__m128d A,__m128d B)255 __m128d test_mm_cmple_pd(__m128d A, __m128d B) {
256   // CHECK-LABEL: test_mm_cmple_pd
257   // CHECK:         [[CMP:%.*]] = fcmp ole <2 x double>
258   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
259   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
260   // CHECK-NEXT:    ret <2 x double> [[BC]]
261   return _mm_cmple_pd(A, B);
262 }
263 
test_mm_cmple_sd(__m128d A,__m128d B)264 __m128d test_mm_cmple_sd(__m128d A, __m128d B) {
265   // CHECK-LABEL: test_mm_cmple_sd
266   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
267   return _mm_cmple_sd(A, B);
268 }
269 
test_mm_cmplt_epi8(__m128i A,__m128i B)270 __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
271   // CHECK-LABEL: test_mm_cmplt_epi8
272   // CHECK: icmp sgt <16 x i8>
273   return _mm_cmplt_epi8(A, B);
274 }
275 
test_mm_cmplt_epi16(__m128i A,__m128i B)276 __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
277   // CHECK-LABEL: test_mm_cmplt_epi16
278   // CHECK: icmp sgt <8 x i16>
279   return _mm_cmplt_epi16(A, B);
280 }
281 
test_mm_cmplt_epi32(__m128i A,__m128i B)282 __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) {
283   // CHECK-LABEL: test_mm_cmplt_epi32
284   // CHECK: icmp sgt <4 x i32>
285   return _mm_cmplt_epi32(A, B);
286 }
287 
test_mm_cmplt_pd(__m128d A,__m128d B)288 __m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
289   // CHECK-LABEL: test_mm_cmplt_pd
290   // CHECK:         [[CMP:%.*]] = fcmp olt <2 x double>
291   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
292   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
293   // CHECK-NEXT:    ret <2 x double> [[BC]]
294   return _mm_cmplt_pd(A, B);
295 }
296 
test_mm_cmplt_sd(__m128d A,__m128d B)297 __m128d test_mm_cmplt_sd(__m128d A, __m128d B) {
298   // CHECK-LABEL: test_mm_cmplt_sd
299   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
300   return _mm_cmplt_sd(A, B);
301 }
302 
test_mm_cmpneq_pd(__m128d A,__m128d B)303 __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
304   // CHECK-LABEL: test_mm_cmpneq_pd
305   // CHECK:         [[CMP:%.*]] = fcmp une <2 x double>
306   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
307   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
308   // CHECK-NEXT:    ret <2 x double> [[BC]]
309   return _mm_cmpneq_pd(A, B);
310 }
311 
test_mm_cmpneq_sd(__m128d A,__m128d B)312 __m128d test_mm_cmpneq_sd(__m128d A, __m128d B) {
313   // CHECK-LABEL: test_mm_cmpneq_sd
314   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
315   return _mm_cmpneq_sd(A, B);
316 }
317 
test_mm_cmpnge_pd(__m128d A,__m128d B)318 __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
319   // CHECK-LABEL: test_mm_cmpnge_pd
320   // CHECK:         [[CMP:%.*]] = fcmp ugt <2 x double>
321   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
322   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
323   // CHECK-NEXT:    ret <2 x double> [[BC]]
324   return _mm_cmpnge_pd(A, B);
325 }
326 
test_mm_cmpnge_sd(__m128d A,__m128d B)327 __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
328   // CHECK-LABEL: test_mm_cmpnge_sd
329   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
330   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
331   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
332   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
333   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
334   return _mm_cmpnge_sd(A, B);
335 }
336 
test_mm_cmpngt_pd(__m128d A,__m128d B)337 __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
338   // CHECK-LABEL: test_mm_cmpngt_pd
339   // CHECK:         [[CMP:%.*]] = fcmp uge <2 x double>
340   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
341   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
342   // CHECK-NEXT:    ret <2 x double> [[BC]]
343   return _mm_cmpngt_pd(A, B);
344 }
345 
test_mm_cmpngt_sd(__m128d A,__m128d B)346 __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
347   // CHECK-LABEL: test_mm_cmpngt_sd
348   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
349   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
350   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
351   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
352   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
353   return _mm_cmpngt_sd(A, B);
354 }
355 
test_mm_cmpnle_pd(__m128d A,__m128d B)356 __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
357   // CHECK-LABEL: test_mm_cmpnle_pd
358   // CHECK:         [[CMP:%.*]] = fcmp ugt <2 x double>
359   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
360   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
361   // CHECK-NEXT:    ret <2 x double> [[BC]]
362   return _mm_cmpnle_pd(A, B);
363 }
364 
test_mm_cmpnle_sd(__m128d A,__m128d B)365 __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
366   // CHECK-LABEL: test_mm_cmpnle_sd
367   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
368   return _mm_cmpnle_sd(A, B);
369 }
370 
test_mm_cmpnlt_pd(__m128d A,__m128d B)371 __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
372   // CHECK-LABEL: test_mm_cmpnlt_pd
373   // CHECK:         [[CMP:%.*]] = fcmp uge <2 x double>
374   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
375   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
376   // CHECK-NEXT:    ret <2 x double> [[BC]]
377   return _mm_cmpnlt_pd(A, B);
378 }
379 
test_mm_cmpnlt_sd(__m128d A,__m128d B)380 __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
381   // CHECK-LABEL: test_mm_cmpnlt_sd
382   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
383   return _mm_cmpnlt_sd(A, B);
384 }
385 
test_mm_cmpord_pd(__m128d A,__m128d B)386 __m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
387   // CHECK-LABEL: test_mm_cmpord_pd
388   // CHECK:         [[CMP:%.*]] = fcmp ord <2 x double>
389   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
390   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
391   // CHECK-NEXT:    ret <2 x double> [[BC]]
392   return _mm_cmpord_pd(A, B);
393 }
394 
test_mm_cmpord_sd(__m128d A,__m128d B)395 __m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
396   // CHECK-LABEL: test_mm_cmpord_sd
397   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
398   return _mm_cmpord_sd(A, B);
399 }
400 
test_mm_cmpunord_pd(__m128d A,__m128d B)401 __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
402   // CHECK-LABEL: test_mm_cmpunord_pd
403   // CHECK:         [[CMP:%.*]] = fcmp uno <2 x double>
404   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
405   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
406   // CHECK-NEXT:    ret <2 x double> [[BC]]
407   return _mm_cmpunord_pd(A, B);
408 }
409 
test_mm_cmpunord_sd(__m128d A,__m128d B)410 __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
411   // CHECK-LABEL: test_mm_cmpunord_sd
412   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
413   return _mm_cmpunord_sd(A, B);
414 }
415 
test_mm_comieq_sd(__m128d A,__m128d B)416 int test_mm_comieq_sd(__m128d A, __m128d B) {
417   // CHECK-LABEL: test_mm_comieq_sd
418   // CHECK: call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
419   return _mm_comieq_sd(A, B);
420 }
421 
test_mm_comige_sd(__m128d A,__m128d B)422 int test_mm_comige_sd(__m128d A, __m128d B) {
423   // CHECK-LABEL: test_mm_comige_sd
424   // CHECK: call i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
425   return _mm_comige_sd(A, B);
426 }
427 
test_mm_comigt_sd(__m128d A,__m128d B)428 int test_mm_comigt_sd(__m128d A, __m128d B) {
429   // CHECK-LABEL: test_mm_comigt_sd
430   // CHECK: call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
431   return _mm_comigt_sd(A, B);
432 }
433 
test_mm_comile_sd(__m128d A,__m128d B)434 int test_mm_comile_sd(__m128d A, __m128d B) {
435   // CHECK-LABEL: test_mm_comile_sd
436   // CHECK: call i32 @llvm.x86.sse2.comile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
437   return _mm_comile_sd(A, B);
438 }
439 
test_mm_comilt_sd(__m128d A,__m128d B)440 int test_mm_comilt_sd(__m128d A, __m128d B) {
441   // CHECK-LABEL: test_mm_comilt_sd
442   // CHECK: call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
443   return _mm_comilt_sd(A, B);
444 }
445 
test_mm_comineq_sd(__m128d A,__m128d B)446 int test_mm_comineq_sd(__m128d A, __m128d B) {
447   // CHECK-LABEL: test_mm_comineq_sd
448   // CHECK: call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
449   return _mm_comineq_sd(A, B);
450 }
451 
test_mm_cvtepi32_pd(__m128i A)452 __m128d test_mm_cvtepi32_pd(__m128i A) {
453   // CHECK-LABEL: test_mm_cvtepi32_pd
454   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1>
455   // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
456   return _mm_cvtepi32_pd(A);
457 }
458 
test_mm_cvtepi32_ps(__m128i A)459 __m128 test_mm_cvtepi32_ps(__m128i A) {
460   // CHECK-LABEL: test_mm_cvtepi32_ps
461   // CHECK: call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %{{.*}})
462   return _mm_cvtepi32_ps(A);
463 }
464 
test_mm_cvtpd_epi32(__m128d A)465 __m128i test_mm_cvtpd_epi32(__m128d A) {
466   // CHECK-LABEL: test_mm_cvtpd_epi32
467   // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %{{.*}})
468   return _mm_cvtpd_epi32(A);
469 }
470 
test_mm_cvtpd_ps(__m128d A)471 __m128 test_mm_cvtpd_ps(__m128d A) {
472   // CHECK-LABEL: test_mm_cvtpd_ps
473   // CHECK: call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %{{.*}})
474   return _mm_cvtpd_ps(A);
475 }
476 
test_mm_cvtps_epi32(__m128 A)477 __m128i test_mm_cvtps_epi32(__m128 A) {
478   // CHECK-LABEL: test_mm_cvtps_epi32
479   // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}})
480   return _mm_cvtps_epi32(A);
481 }
482 
test_mm_cvtps_pd(__m128 A)483 __m128d test_mm_cvtps_pd(__m128 A) {
484   // CHECK-LABEL: test_mm_cvtps_pd
485   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
486   // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
487   return _mm_cvtps_pd(A);
488 }
489 
test_mm_cvtsd_f64(__m128d A)490 double test_mm_cvtsd_f64(__m128d A) {
491   // CHECK-LABEL: test_mm_cvtsd_f64
492   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
493   return _mm_cvtsd_f64(A);
494 }
495 
test_mm_cvtsd_si32(__m128d A)496 int test_mm_cvtsd_si32(__m128d A) {
497   // CHECK-LABEL: test_mm_cvtsd_si32
498   // CHECK: call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}})
499   return _mm_cvtsd_si32(A);
500 }
501 
test_mm_cvtsd_si64(__m128d A)502 long long test_mm_cvtsd_si64(__m128d A) {
503   // CHECK-LABEL: test_mm_cvtsd_si64
504   // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}})
505   return _mm_cvtsd_si64(A);
506 }
507 
test_mm_cvtsd_ss(__m128 A,__m128d B)508 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
509   // CHECK-LABEL: test_mm_cvtsd_ss
510   // CHECK: fptrunc double %{{.*}} to float
511   return _mm_cvtsd_ss(A, B);
512 }
513 
test_mm_cvtsi128_si32(__m128i A)514 int test_mm_cvtsi128_si32(__m128i A) {
515   // CHECK-LABEL: test_mm_cvtsi128_si32
516   // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
517   return _mm_cvtsi128_si32(A);
518 }
519 
test_mm_cvtsi128_si64(__m128i A)520 long long test_mm_cvtsi128_si64(__m128i A) {
521   // CHECK-LABEL: test_mm_cvtsi128_si64
522   // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
523   return _mm_cvtsi128_si64(A);
524 }
525 
test_mm_cvtsi32_sd(__m128d A,int B)526 __m128d test_mm_cvtsi32_sd(__m128d A, int B) {
527   // CHECK-LABEL: test_mm_cvtsi32_sd
528   // CHECK: sitofp i32 %{{.*}} to double
529   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
530   return _mm_cvtsi32_sd(A, B);
531 }
532 
test_mm_cvtsi32_si128(int A)533 __m128i test_mm_cvtsi32_si128(int A) {
534   // CHECK-LABEL: test_mm_cvtsi32_si128
535   // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
536   // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
537   // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
538   // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
539   return _mm_cvtsi32_si128(A);
540 }
541 
test_mm_cvtsi64_sd(__m128d A,long long B)542 __m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
543   // CHECK-LABEL: test_mm_cvtsi64_sd
544   // CHECK: sitofp i64 %{{.*}} to double
545   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
546   return _mm_cvtsi64_sd(A, B);
547 }
548 
test_mm_cvtsi64_si128(long long A)549 __m128i test_mm_cvtsi64_si128(long long A) {
550   // CHECK-LABEL: test_mm_cvtsi64_si128
551   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
552   // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
553   return _mm_cvtsi64_si128(A);
554 }
555 
test_mm_cvtss_sd(__m128d A,__m128 B)556 __m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
557   // CHECK-LABEL: test_mm_cvtss_sd
558   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
559   // CHECK: fpext float %{{.*}} to double
560   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
561   return _mm_cvtss_sd(A, B);
562 }
563 
test_mm_cvttpd_epi32(__m128d A)564 __m128i test_mm_cvttpd_epi32(__m128d A) {
565   // CHECK-LABEL: test_mm_cvttpd_epi32
566   // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %{{.*}})
567   return _mm_cvttpd_epi32(A);
568 }
569 
test_mm_cvttps_epi32(__m128 A)570 __m128i test_mm_cvttps_epi32(__m128 A) {
571   // CHECK-LABEL: test_mm_cvttps_epi32
572   // CHECK: fptosi <4 x float> %{{.*}} to <4 x i32>
573   return _mm_cvttps_epi32(A);
574 }
575 
test_mm_cvttsd_si32(__m128d A)576 int test_mm_cvttsd_si32(__m128d A) {
577   // CHECK-LABEL: test_mm_cvttsd_si32
578   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
579   // CHECK: fptosi double %{{.*}} to i32
580   return _mm_cvttsd_si32(A);
581 }
582 
test_mm_cvttsd_si64(__m128d A)583 long long test_mm_cvttsd_si64(__m128d A) {
584   // CHECK-LABEL: test_mm_cvttsd_si64
585   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
586   // CHECK: fptosi double %{{.*}} to i64
587   return _mm_cvttsd_si64(A);
588 }
589 
test_mm_div_pd(__m128d A,__m128d B)590 __m128d test_mm_div_pd(__m128d A, __m128d B) {
591   // CHECK-LABEL: test_mm_div_pd
592   // CHECK: fdiv <2 x double>
593   return _mm_div_pd(A, B);
594 }
595 
test_mm_div_sd(__m128d A,__m128d B)596 __m128d test_mm_div_sd(__m128d A, __m128d B) {
597   // CHECK-LABEL: test_mm_div_sd
598   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
599   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
600   // CHECK: fdiv double
601   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
602   return _mm_div_sd(A, B);
603 }
604 
605 // Lowering to pextrw requires optimization.
test_mm_extract_epi16(__m128i A)606 int test_mm_extract_epi16(__m128i A) {
607   // CHECK-LABEL: test_mm_extract_epi16
608   // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
609   // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]]
610   // CHECK: zext i16 %{{.*}} to i32
611   return _mm_extract_epi16(A, 9);
612 }
613 
test_mm_insert_epi16(__m128i A,int B)614 __m128i test_mm_insert_epi16(__m128i A, int B) {
615   // CHECK-LABEL: test_mm_insert_epi16
616   // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
617   // CHECK: insertelement <8 x i16> %{{.*}}, i32 [[x]]
618   return _mm_insert_epi16(A, B, 8);
619 }
620 
test_mm_lfence()621 void test_mm_lfence() {
622   // CHECK-LABEL: test_mm_lfence
623   // CHECK: call void @llvm.x86.sse2.lfence()
624   _mm_lfence();
625 }
626 
test_mm_load_pd(double const * A)627 __m128d test_mm_load_pd(double const* A) {
628   // CHECK-LABEL: test_mm_load_pd
629   // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
630   return _mm_load_pd(A);
631 }
632 
test_mm_load_pd1(double const * A)633 __m128d test_mm_load_pd1(double const* A) {
634   // CHECK-LABEL: test_mm_load_pd1
635   // CHECK: load double, double* %{{.*}}, align 8
636   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
637   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
638   return _mm_load_pd1(A);
639 }
640 
test_mm_load_sd(double const * A)641 __m128d test_mm_load_sd(double const* A) {
642   // CHECK-LABEL: test_mm_load_sd
643   // CHECK: load double, double* %{{.*}}, align 1{{$}}
644   return _mm_load_sd(A);
645 }
646 
test_mm_load_si128(__m128i const * A)647 __m128i test_mm_load_si128(__m128i const* A) {
648   // CHECK-LABEL: test_mm_load_si128
649   // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
650   return _mm_load_si128(A);
651 }
652 
test_mm_load1_pd(double const * A)653 __m128d test_mm_load1_pd(double const* A) {
654   // CHECK-LABEL: test_mm_load1_pd
655   // CHECK: load double, double* %{{.*}}, align 8
656   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
657   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
658   return _mm_load1_pd(A);
659 }
660 
test_mm_loadh_pd(__m128d x,void * y)661 __m128d test_mm_loadh_pd(__m128d x, void* y) {
662   // CHECK-LABEL: test_mm_loadh_pd
663   // CHECK: load double, double* %{{.*}}, align 1{{$}}
664   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
665   return _mm_loadh_pd(x, y);
666 }
667 
test_mm_loadl_epi64(__m128i * y)668 __m128i test_mm_loadl_epi64(__m128i* y) {
669   // CHECK: test_mm_loadl_epi64
670   // CHECK: load i64, i64* {{.*}}, align 1{{$}}
671   // CHECK: insertelement <2 x i64> undef, i64 {{.*}}, i32 0
672   // CHECK: insertelement <2 x i64> {{.*}}, i64 0, i32 1
673   return _mm_loadl_epi64(y);
674 }
675 
test_mm_loadl_pd(__m128d x,void * y)676 __m128d test_mm_loadl_pd(__m128d x, void* y) {
677   // CHECK-LABEL: test_mm_loadl_pd
678   // CHECK: load double, double* %{{.*}}, align 1{{$}}
679   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
680   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
681   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
682   return _mm_loadl_pd(x, y);
683 }
684 
test_mm_loadr_pd(double const * A)685 __m128d test_mm_loadr_pd(double const* A) {
686   // CHECK-LABEL: test_mm_loadr_pd
687   // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
688   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
689   return _mm_loadr_pd(A);
690 }
691 
test_mm_loadu_pd(double const * A)692 __m128d test_mm_loadu_pd(double const* A) {
693   // CHECK-LABEL: test_mm_loadu_pd
694   // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}}
695   return _mm_loadu_pd(A);
696 }
697 
test_mm_loadu_si128(__m128i const * A)698 __m128i test_mm_loadu_si128(__m128i const* A) {
699   // CHECK-LABEL: test_mm_loadu_si128
700   // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}}
701   return _mm_loadu_si128(A);
702 }
703 
test_mm_loadu_si64(void const * A)704 __m128i test_mm_loadu_si64(void const* A) {
705   // CHECK-LABEL: test_mm_loadu_si64
706   // CHECK: load i64, i64* %{{.*}}, align 1{{$}}
707   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
708   // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
709   return _mm_loadu_si64(A);
710 }
711 
test_mm_madd_epi16(__m128i A,__m128i B)712 __m128i test_mm_madd_epi16(__m128i A, __m128i B) {
713   // CHECK-LABEL: test_mm_madd_epi16
714   // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
715   return _mm_madd_epi16(A, B);
716 }
717 
test_mm_maskmoveu_si128(__m128i A,__m128i B,char * C)718 void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) {
719   // CHECK-LABEL: test_mm_maskmoveu_si128
720   // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8* %{{.*}})
721   _mm_maskmoveu_si128(A, B, C);
722 }
723 
test_mm_max_epi16(__m128i A,__m128i B)724 __m128i test_mm_max_epi16(__m128i A, __m128i B) {
725   // CHECK-LABEL: test_mm_max_epi16
726   // CHECK:       [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]]
727   // CHECK-NEXT:  select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
728   return _mm_max_epi16(A, B);
729 }
730 
test_mm_max_epu8(__m128i A,__m128i B)731 __m128i test_mm_max_epu8(__m128i A, __m128i B) {
732   // CHECK-LABEL: test_mm_max_epu8
733   // CHECK:       [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]]
734   // CHECK-NEXT:  select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
735   return _mm_max_epu8(A, B);
736 }
737 
test_mm_max_pd(__m128d A,__m128d B)738 __m128d test_mm_max_pd(__m128d A, __m128d B) {
739   // CHECK-LABEL: test_mm_max_pd
740   // CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
741   return _mm_max_pd(A, B);
742 }
743 
test_mm_max_sd(__m128d A,__m128d B)744 __m128d test_mm_max_sd(__m128d A, __m128d B) {
745   // CHECK-LABEL: test_mm_max_sd
746   // CHECK: call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
747   return _mm_max_sd(A, B);
748 }
749 
test_mm_mfence()750 void test_mm_mfence() {
751   // CHECK-LABEL: test_mm_mfence
752   // CHECK: call void @llvm.x86.sse2.mfence()
753   _mm_mfence();
754 }
755 
test_mm_min_epi16(__m128i A,__m128i B)756 __m128i test_mm_min_epi16(__m128i A, __m128i B) {
757   // CHECK-LABEL: test_mm_min_epi16
758   // CHECK:       [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]]
759   // CHECK-NEXT:  select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
760   return _mm_min_epi16(A, B);
761 }
762 
test_mm_min_epu8(__m128i A,__m128i B)763 __m128i test_mm_min_epu8(__m128i A, __m128i B) {
764   // CHECK-LABEL: test_mm_min_epu8
765   // CHECK:       [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]]
766   // CHECK-NEXT:  select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
767   return _mm_min_epu8(A, B);
768 }
769 
test_mm_min_pd(__m128d A,__m128d B)770 __m128d test_mm_min_pd(__m128d A, __m128d B) {
771   // CHECK-LABEL: test_mm_min_pd
772   // CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
773   return _mm_min_pd(A, B);
774 }
775 
test_mm_min_sd(__m128d A,__m128d B)776 __m128d test_mm_min_sd(__m128d A, __m128d B) {
777   // CHECK-LABEL: test_mm_min_sd
778   // CHECK: call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
779   return _mm_min_sd(A, B);
780 }
781 
test_mm_move_epi64(__m128i A)782 __m128i test_mm_move_epi64(__m128i A) {
783   // CHECK-LABEL: test_mm_move_epi64
784   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
785   return _mm_move_epi64(A);
786 }
787 
test_mm_move_sd(__m128d A,__m128d B)788 __m128d test_mm_move_sd(__m128d A, __m128d B) {
789   // CHECK-LABEL: test_mm_move_sd
790   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
791   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
792   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
793   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
794   return _mm_move_sd(A, B);
795 }
796 
test_mm_movemask_epi8(__m128i A)797 int test_mm_movemask_epi8(__m128i A) {
798   // CHECK-LABEL: test_mm_movemask_epi8
799   // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
800   return _mm_movemask_epi8(A);
801 }
802 
test_mm_movemask_pd(__m128d A)803 int test_mm_movemask_pd(__m128d A) {
804   // CHECK-LABEL: test_mm_movemask_pd
805   // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
806   return _mm_movemask_pd(A);
807 }
808 
test_mm_mul_epu32(__m128i A,__m128i B)809 __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
810   // CHECK-LABEL: test_mm_mul_epu32
811   // CHECK: call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
812   return _mm_mul_epu32(A, B);
813 }
814 
test_mm_mul_pd(__m128d A,__m128d B)815 __m128d test_mm_mul_pd(__m128d A, __m128d B) {
816   // CHECK-LABEL: test_mm_mul_pd
817   // CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
818   return _mm_mul_pd(A, B);
819 }
820 
test_mm_mul_sd(__m128d A,__m128d B)821 __m128d test_mm_mul_sd(__m128d A, __m128d B) {
822   // CHECK-LABEL: test_mm_mul_sd
823   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
824   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
825   // CHECK: fmul double
826   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
827   return _mm_mul_sd(A, B);
828 }
829 
test_mm_mulhi_epi16(__m128i A,__m128i B)830 __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
831   // CHECK-LABEL: test_mm_mulhi_epi16
832   // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
833   return _mm_mulhi_epi16(A, B);
834 }
835 
test_mm_mulhi_epu16(__m128i A,__m128i B)836 __m128i test_mm_mulhi_epu16(__m128i A, __m128i B) {
837   // CHECK-LABEL: test_mm_mulhi_epu16
838   // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
839   return _mm_mulhi_epu16(A, B);
840 }
841 
test_mm_mullo_epi16(__m128i A,__m128i B)842 __m128i test_mm_mullo_epi16(__m128i A, __m128i B) {
843   // CHECK-LABEL: test_mm_mullo_epi16
844   // CHECK: mul <8 x i16> %{{.*}}, %{{.*}}
845   return _mm_mullo_epi16(A, B);
846 }
847 
test_mm_or_pd(__m128d A,__m128d B)848 __m128d test_mm_or_pd(__m128d A, __m128d B) {
849   // CHECK-LABEL: test_mm_or_pd
850   // CHECK: or <4 x i32> %{{.*}}, %{{.*}}
851   return _mm_or_pd(A, B);
852 }
853 
test_mm_or_si128(__m128i A,__m128i B)854 __m128i test_mm_or_si128(__m128i A, __m128i B) {
855   // CHECK-LABEL: test_mm_or_si128
856   // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
857   return _mm_or_si128(A, B);
858 }
859 
test_mm_packs_epi16(__m128i A,__m128i B)860 __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
861   // CHECK-LABEL: test_mm_packs_epi16
862   // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
863   return _mm_packs_epi16(A, B);
864 }
865 
test_mm_packs_epi32(__m128i A,__m128i B)866 __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
867   // CHECK-LABEL: test_mm_packs_epi32
868   // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
869   return _mm_packs_epi32(A, B);
870 }
871 
test_mm_packus_epi16(__m128i A,__m128i B)872 __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
873   // CHECK-LABEL: test_mm_packus_epi16
874   // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
875   return _mm_packus_epi16(A, B);
876 }
877 
test_mm_pause()878 void test_mm_pause() {
879   // CHECK-LABEL: test_mm_pause
880   // CHECK: call void @llvm.x86.sse2.pause()
881   return _mm_pause();
882 }
883 
test_mm_sad_epu8(__m128i A,__m128i B)884 __m128i test_mm_sad_epu8(__m128i A, __m128i B) {
885   // CHECK-LABEL: test_mm_sad_epu8
886   // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
887   return _mm_sad_epu8(A, B);
888 }
889 
test_mm_set_epi8(char A,char B,char C,char D,char E,char F,char G,char H,char I,char J,char K,char L,char M,char N,char O,char P)890 __m128i test_mm_set_epi8(char A, char B, char C, char D,
891                          char E, char F, char G, char H,
892                          char I, char J, char K, char L,
893                          char M, char N, char O, char P) {
894   // CHECK-LABEL: test_mm_set_epi8
895   // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
896   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
897   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
898   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
899   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
900   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
901   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
902   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
903   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
904   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
905   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
906   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
907   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
908   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
909   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
910   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
911   return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
912 }
913 
test_mm_set_epi16(short A,short B,short C,short D,short E,short F,short G,short H)914 __m128i test_mm_set_epi16(short A, short B, short C, short D,
915                           short E, short F, short G, short H) {
916   // CHECK-LABEL: test_mm_set_epi16
917   // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
918   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
919   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
920   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
921   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
922   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
923   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
924   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
925   return _mm_set_epi16(A, B, C, D, E, F, G, H);
926 }
927 
test_mm_set_epi32(int A,int B,int C,int D)928 __m128i test_mm_set_epi32(int A, int B, int C, int D) {
929   // CHECK-LABEL: test_mm_set_epi32
930   // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
931   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
932   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
933   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
934   return _mm_set_epi32(A, B, C, D);
935 }
936 
test_mm_set_epi64(__m64 A,__m64 B)937 __m128i test_mm_set_epi64(__m64 A, __m64 B) {
938   // CHECK-LABEL: test_mm_set_epi64
939   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
940   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
941   return _mm_set_epi64(A, B);
942 }
943 
test_mm_set_epi64x(long long A,long long B)944 __m128i test_mm_set_epi64x(long long A, long long B) {
945   // CHECK-LABEL: test_mm_set_epi64x
946   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
947   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
948   return _mm_set_epi64x(A, B);
949 }
950 
test_mm_set_pd(double A,double B)951 __m128d test_mm_set_pd(double A, double B) {
952   // CHECK-LABEL: test_mm_set_pd
953   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
954   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
955   return _mm_set_pd(A, B);
956 }
957 
test_mm_set_sd(double A)958 __m128d test_mm_set_sd(double A) {
959   // CHECK-LABEL: test_mm_set_sd
960   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
961   // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
962   return _mm_set_sd(A);
963 }
964 
test_mm_set1_epi8(char A)965 __m128i test_mm_set1_epi8(char A) {
966   // CHECK-LABEL: test_mm_set1_epi8
967   // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
968   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
969   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
970   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
971   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
972   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
973   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
974   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
975   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
976   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
977   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
978   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
979   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
980   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
981   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
982   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
983   return _mm_set1_epi8(A);
984 }
985 
test_mm_set1_epi16(short A)986 __m128i test_mm_set1_epi16(short A) {
987   // CHECK-LABEL: test_mm_set1_epi16
988   // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
989   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
990   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
991   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
992   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
993   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
994   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
995   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
996   return _mm_set1_epi16(A);
997 }
998 
test_mm_set1_epi32(int A)999 __m128i test_mm_set1_epi32(int A) {
1000   // CHECK-LABEL: test_mm_set1_epi32
1001   // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1002   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1003   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1004   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1005   return _mm_set1_epi32(A);
1006 }
1007 
test_mm_set1_epi64(__m64 A)1008 __m128i test_mm_set1_epi64(__m64 A) {
1009   // CHECK-LABEL: test_mm_set1_epi64
1010   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1011   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1012   return _mm_set1_epi64(A);
1013 }
1014 
test_mm_set1_epi64x(long long A)1015 __m128i test_mm_set1_epi64x(long long A) {
1016   // CHECK-LABEL: test_mm_set1_epi64x
1017   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1018   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1019   return _mm_set1_epi64x(A);
1020 }
1021 
test_mm_set1_pd(double A)1022 __m128d test_mm_set1_pd(double A) {
1023   // CHECK-LABEL: test_mm_set1_pd
1024   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1025   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1026   return _mm_set1_pd(A);
1027 }
1028 
test_mm_setr_epi8(char A,char B,char C,char D,char E,char F,char G,char H,char I,char J,char K,char L,char M,char N,char O,char P)1029 __m128i test_mm_setr_epi8(char A, char B, char C, char D,
1030                           char E, char F, char G, char H,
1031                           char I, char J, char K, char L,
1032                           char M, char N, char O, char P) {
1033   // CHECK-LABEL: test_mm_setr_epi8
1034   // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
1035   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
1036   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
1037   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
1038   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
1039   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
1040   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
1041   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
1042   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
1043   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
1044   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
1045   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
1046   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
1047   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
1048   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
1049   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
1050   return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
1051 }
1052 
test_mm_setr_epi16(short A,short B,short C,short D,short E,short F,short G,short H)1053 __m128i test_mm_setr_epi16(short A, short B, short C, short D,
1054                            short E, short F, short G, short H) {
1055   // CHECK-LABEL: test_mm_setr_epi16
1056   // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
1057   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1058   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1059   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1060   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1061   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1062   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1063   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1064   return _mm_setr_epi16(A, B, C, D, E, F, G, H);
1065 }
1066 
test_mm_setr_epi32(int A,int B,int C,int D)1067 __m128i test_mm_setr_epi32(int A, int B, int C, int D) {
1068   // CHECK-LABEL: test_mm_setr_epi32
1069   // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1070   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1071   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1072   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1073   return _mm_setr_epi32(A, B, C, D);
1074 }
1075 
test_mm_setr_epi64(__m64 A,__m64 B)1076 __m128i test_mm_setr_epi64(__m64 A, __m64 B) {
1077   // CHECK-LABEL: test_mm_setr_epi64
1078   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1079   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1080   return _mm_setr_epi64(A, B);
1081 }
1082 
test_mm_setr_pd(double A,double B)1083 __m128d test_mm_setr_pd(double A, double B) {
1084   // CHECK-LABEL: test_mm_setr_pd
1085   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1086   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1087   return _mm_setr_pd(A, B);
1088 }
1089 
test_mm_setzero_pd()1090 __m128d test_mm_setzero_pd() {
1091   // CHECK-LABEL: test_mm_setzero_pd
1092   // CHECK: store <2 x double> zeroinitializer
1093   return _mm_setzero_pd();
1094 }
1095 
test_mm_setzero_si128()1096 __m128i test_mm_setzero_si128() {
1097   // CHECK-LABEL: test_mm_setzero_si128
1098   // CHECK: store <2 x i64> zeroinitializer
1099   return _mm_setzero_si128();
1100 }
1101 
test_mm_shuffle_epi32(__m128i A)1102 __m128i test_mm_shuffle_epi32(__m128i A) {
1103   // CHECK-LABEL: test_mm_shuffle_epi32
1104   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
1105   return _mm_shuffle_epi32(A, 0);
1106 }
1107 
test_mm_shuffle_pd(__m128d A,__m128d B)1108 __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
1109   // CHECK-LABEL: test_mm_shuffle_pd
1110   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
1111   return _mm_shuffle_pd(A, B, 1);
1112 }
1113 
test_mm_shufflehi_epi16(__m128i A)1114 __m128i test_mm_shufflehi_epi16(__m128i A) {
1115   // CHECK-LABEL: test_mm_shufflehi_epi16
1116   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
1117   return _mm_shufflehi_epi16(A, 0);
1118 }
1119 
test_mm_shufflelo_epi16(__m128i A)1120 __m128i test_mm_shufflelo_epi16(__m128i A) {
1121   // CHECK-LABEL: test_mm_shufflelo_epi16
1122   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
1123   return _mm_shufflelo_epi16(A, 0);
1124 }
1125 
test_mm_sll_epi16(__m128i A,__m128i B)1126 __m128i test_mm_sll_epi16(__m128i A, __m128i B) {
1127   // CHECK-LABEL: test_mm_sll_epi16
1128   // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1129   return _mm_sll_epi16(A, B);
1130 }
1131 
test_mm_sll_epi32(__m128i A,__m128i B)1132 __m128i test_mm_sll_epi32(__m128i A, __m128i B) {
1133   // CHECK-LABEL: test_mm_sll_epi32
1134   // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1135   return _mm_sll_epi32(A, B);
1136 }
1137 
test_mm_sll_epi64(__m128i A,__m128i B)1138 __m128i test_mm_sll_epi64(__m128i A, __m128i B) {
1139   // CHECK-LABEL: test_mm_sll_epi64
1140   // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1141   return _mm_sll_epi64(A, B);
1142 }
1143 
test_mm_slli_epi16(__m128i A)1144 __m128i test_mm_slli_epi16(__m128i A) {
1145   // CHECK-LABEL: test_mm_slli_epi16
1146   // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1147   return _mm_slli_epi16(A, 1);
1148 }
1149 
test_mm_slli_epi32(__m128i A)1150 __m128i test_mm_slli_epi32(__m128i A) {
1151   // CHECK-LABEL: test_mm_slli_epi32
1152   // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1153   return _mm_slli_epi32(A, 1);
1154 }
1155 
test_mm_slli_epi64(__m128i A)1156 __m128i test_mm_slli_epi64(__m128i A) {
1157   // CHECK-LABEL: test_mm_slli_epi64
1158   // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1159   return _mm_slli_epi64(A, 1);
1160 }
1161 
test_mm_slli_si128(__m128i A)1162 __m128i test_mm_slli_si128(__m128i A) {
1163   // CHECK-LABEL: test_mm_slli_si128
1164   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
1165   return _mm_slli_si128(A, 5);
1166 }
1167 
test_mm_slli_si128_2(__m128i A)1168 __m128i test_mm_slli_si128_2(__m128i A) {
1169   // CHECK-LABEL: test_mm_slli_si128_2
1170   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1171   return _mm_slli_si128(A, 17);
1172 }
1173 
test_mm_sqrt_pd(__m128d A)1174 __m128d test_mm_sqrt_pd(__m128d A) {
1175   // CHECK-LABEL: test_mm_sqrt_pd
1176   // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %{{.*}})
1177   return _mm_sqrt_pd(A);
1178 }
1179 
test_mm_sqrt_sd(__m128d A,__m128d B)1180 __m128d test_mm_sqrt_sd(__m128d A, __m128d B) {
1181   // CHECK-LABEL: test_mm_sqrt_sd
1182   // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %{{.*}})
1183   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1184   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1185   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
1186   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1187   return _mm_sqrt_sd(A, B);
1188 }
1189 
test_mm_sra_epi16(__m128i A,__m128i B)1190 __m128i test_mm_sra_epi16(__m128i A, __m128i B) {
1191   // CHECK-LABEL: test_mm_sra_epi16
1192   // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1193   return _mm_sra_epi16(A, B);
1194 }
1195 
test_mm_sra_epi32(__m128i A,__m128i B)1196 __m128i test_mm_sra_epi32(__m128i A, __m128i B) {
1197   // CHECK-LABEL: test_mm_sra_epi32
1198   // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1199   return _mm_sra_epi32(A, B);
1200 }
1201 
test_mm_srai_epi16(__m128i A)1202 __m128i test_mm_srai_epi16(__m128i A) {
1203   // CHECK-LABEL: test_mm_srai_epi16
1204   // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1205   return _mm_srai_epi16(A, 1);
1206 }
1207 
test_mm_srai_epi32(__m128i A)1208 __m128i test_mm_srai_epi32(__m128i A) {
1209   // CHECK-LABEL: test_mm_srai_epi32
1210   // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1211   return _mm_srai_epi32(A, 1);
1212 }
1213 
test_mm_srl_epi16(__m128i A,__m128i B)1214 __m128i test_mm_srl_epi16(__m128i A, __m128i B) {
1215   // CHECK-LABEL: test_mm_srl_epi16
1216   // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1217   return _mm_srl_epi16(A, B);
1218 }
1219 
test_mm_srl_epi32(__m128i A,__m128i B)1220 __m128i test_mm_srl_epi32(__m128i A, __m128i B) {
1221   // CHECK-LABEL: test_mm_srl_epi32
1222   // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1223   return _mm_srl_epi32(A, B);
1224 }
1225 
test_mm_srl_epi64(__m128i A,__m128i B)1226 __m128i test_mm_srl_epi64(__m128i A, __m128i B) {
1227   // CHECK-LABEL: test_mm_srl_epi64
1228   // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1229   return _mm_srl_epi64(A, B);
1230 }
1231 
test_mm_srli_epi16(__m128i A)1232 __m128i test_mm_srli_epi16(__m128i A) {
1233   // CHECK-LABEL: test_mm_srli_epi16
1234   // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1235   return _mm_srli_epi16(A, 1);
1236 }
1237 
test_mm_srli_epi32(__m128i A)1238 __m128i test_mm_srli_epi32(__m128i A) {
1239   // CHECK-LABEL: test_mm_srli_epi32
1240   // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1241   return _mm_srli_epi32(A, 1);
1242 }
1243 
test_mm_srli_epi64(__m128i A)1244 __m128i test_mm_srli_epi64(__m128i A) {
1245   // CHECK-LABEL: test_mm_srli_epi64
1246   // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1247   return _mm_srli_epi64(A, 1);
1248 }
1249 
test_mm_srli_si128(__m128i A)1250 __m128i test_mm_srli_si128(__m128i A) {
1251   // CHECK-LABEL: test_mm_srli_si128
1252   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
1253   return _mm_srli_si128(A, 5);
1254 }
1255 
test_mm_srli_si128_2(__m128i A)1256 __m128i test_mm_srli_si128_2(__m128i A) {
1257   // CHECK-LABEL: test_mm_srli_si128_2
1258   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
1259   return _mm_srli_si128(A, 17);
1260 }
1261 
test_mm_store_pd(double * A,__m128d B)1262 void test_mm_store_pd(double* A, __m128d B) {
1263   // CHECK-LABEL: test_mm_store_pd
1264   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1265   _mm_store_pd(A, B);
1266 }
1267 
test_mm_store_pd1(double * x,__m128d y)1268 void test_mm_store_pd1(double* x, __m128d y) {
1269   // CHECK-LABEL: test_mm_store_pd1
1270   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1271   // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
1272   _mm_store_pd1(x, y);
1273 }
1274 
test_mm_store_sd(double * A,__m128d B)1275 void test_mm_store_sd(double* A, __m128d B) {
1276   // CHECK-LABEL: test_mm_store_sd
1277   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1278   // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1279   _mm_store_sd(A, B);
1280 }
1281 
test_mm_store_si128(__m128i * A,__m128i B)1282 void test_mm_store_si128(__m128i* A, __m128i B) {
1283   // CHECK-LABEL: test_mm_store_si128
1284   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
1285   _mm_store_si128(A, B);
1286 }
1287 
test_mm_store1_pd(double * x,__m128d y)1288 void test_mm_store1_pd(double* x, __m128d y) {
1289   // CHECK-LABEL: test_mm_store1_pd
1290   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1291   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1292   _mm_store1_pd(x, y);
1293 }
1294 
test_mm_storeh_pd(double * A,__m128d B)1295 void test_mm_storeh_pd(double* A, __m128d B) {
1296   // CHECK-LABEL: test_mm_storeh_pd
1297   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
1298   // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1299   _mm_storeh_pd(A, B);
1300 }
1301 
test_mm_storel_epi64(__m128i x,void * y)1302 void test_mm_storel_epi64(__m128i x, void* y) {
1303   // CHECK-LABEL: test_mm_storel_epi64
1304   // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
1305   // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
1306   _mm_storel_epi64(y, x);
1307 }
1308 
test_mm_storel_pd(double * A,__m128d B)1309 void test_mm_storel_pd(double* A, __m128d B) {
1310   // CHECK-LABEL: test_mm_storel_pd
1311   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1312   // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1313   _mm_storel_pd(A, B);
1314 }
1315 
test_mm_storer_pd(__m128d A,double * B)1316 void test_mm_storer_pd(__m128d A, double* B) {
1317   // CHECK-LABEL: test_mm_storer_pd
1318   // CHECK: shufflevector <2 x double> {{.*}}, <2 x double> {{.*}}, <2 x i32> <i32 1, i32 0>
1319   // CHECK: store {{.*}} <2 x double>* {{.*}}, align 16{{$}}
1320   _mm_storer_pd(B, A);
1321 }
1322 
test_mm_storeu_pd(double * A,__m128d B)1323 void test_mm_storeu_pd(double* A, __m128d B) {
1324   // CHECK-LABEL: test_mm_storeu_pd
1325   // CHECK: store {{.*}} <2 x double>* {{.*}}, align 1{{$}}
1326   // CHECK-NEXT: ret void
1327   _mm_storeu_pd(A, B);
1328 }
1329 
test_mm_storeu_si128(__m128i * A,__m128i B)1330 void test_mm_storeu_si128(__m128i* A, __m128i B) {
1331   // CHECK-LABEL: test_mm_storeu_si128
1332   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}}
1333   // CHECK-NEXT: ret void
1334   _mm_storeu_si128(A, B);
1335 }
1336 
test_mm_stream_pd(double * A,__m128d B)1337 void test_mm_stream_pd(double *A, __m128d B) {
1338   // CHECK-LABEL: test_mm_stream_pd
1339   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal
1340   _mm_stream_pd(A, B);
1341 }
1342 
test_mm_stream_si32(int * A,int B)1343 void test_mm_stream_si32(int *A, int B) {
1344   // CHECK-LABEL: test_mm_stream_si32
1345   // CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1, !nontemporal
1346   _mm_stream_si32(A, B);
1347 }
1348 
test_mm_stream_si64(long long * A,long long B)1349 void test_mm_stream_si64(long long *A, long long B) {
1350   // CHECK-LABEL: test_mm_stream_si64
1351   // CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 1, !nontemporal
1352   _mm_stream_si64(A, B);
1353 }
1354 
test_mm_stream_si128(__m128i * A,__m128i B)1355 void test_mm_stream_si128(__m128i *A, __m128i B) {
1356   // CHECK-LABEL: test_mm_stream_si128
1357   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16, !nontemporal
1358   _mm_stream_si128(A, B);
1359 }
1360 
test_mm_sub_epi8(__m128i A,__m128i B)1361 __m128i test_mm_sub_epi8(__m128i A, __m128i B) {
1362   // CHECK-LABEL: test_mm_sub_epi8
1363   // CHECK: sub <16 x i8>
1364   return _mm_sub_epi8(A, B);
1365 }
1366 
test_mm_sub_epi16(__m128i A,__m128i B)1367 __m128i test_mm_sub_epi16(__m128i A, __m128i B) {
1368   // CHECK-LABEL: test_mm_sub_epi16
1369   // CHECK: sub <8 x i16>
1370   return _mm_sub_epi16(A, B);
1371 }
1372 
test_mm_sub_epi32(__m128i A,__m128i B)1373 __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
1374   // CHECK-LABEL: test_mm_sub_epi32
1375   // CHECK: sub <4 x i32>
1376   return _mm_sub_epi32(A, B);
1377 }
1378 
test_mm_sub_epi64(__m128i A,__m128i B)1379 __m128i test_mm_sub_epi64(__m128i A, __m128i B) {
1380   // CHECK-LABEL: test_mm_sub_epi64
1381   // CHECK: sub <2 x i64>
1382   return _mm_sub_epi64(A, B);
1383 }
1384 
test_mm_sub_pd(__m128d A,__m128d B)1385 __m128d test_mm_sub_pd(__m128d A, __m128d B) {
1386   // CHECK-LABEL: test_mm_sub_pd
1387   // CHECK: fsub <2 x double>
1388   return _mm_sub_pd(A, B);
1389 }
1390 
test_mm_sub_sd(__m128d A,__m128d B)1391 __m128d test_mm_sub_sd(__m128d A, __m128d B) {
1392   // CHECK-LABEL: test_mm_sub_sd
1393   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1394   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1395   // CHECK: fsub double
1396   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
1397   return _mm_sub_sd(A, B);
1398 }
1399 
test_mm_subs_epi8(__m128i A,__m128i B)1400 __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
1401   // CHECK-LABEL: test_mm_subs_epi8
1402   // CHECK: call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1403   return _mm_subs_epi8(A, B);
1404 }
1405 
test_mm_subs_epi16(__m128i A,__m128i B)1406 __m128i test_mm_subs_epi16(__m128i A, __m128i B) {
1407   // CHECK-LABEL: test_mm_subs_epi16
1408   // CHECK: call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1409   return _mm_subs_epi16(A, B);
1410 }
1411 
test_mm_subs_epu8(__m128i A,__m128i B)1412 __m128i test_mm_subs_epu8(__m128i A, __m128i B) {
1413   // CHECK-LABEL: test_mm_subs_epu8
1414   // CHECK: call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1415   return _mm_subs_epu8(A, B);
1416 }
1417 
test_mm_subs_epu16(__m128i A,__m128i B)1418 __m128i test_mm_subs_epu16(__m128i A, __m128i B) {
1419   // CHECK-LABEL: test_mm_subs_epu16
1420   // CHECK: call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1421   return _mm_subs_epu16(A, B);
1422 }
1423 
test_mm_ucomieq_sd(__m128d A,__m128d B)1424 int test_mm_ucomieq_sd(__m128d A, __m128d B) {
1425   // CHECK-LABEL: test_mm_ucomieq_sd
1426   // CHECK: call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1427   return _mm_ucomieq_sd(A, B);
1428 }
1429 
test_mm_ucomige_sd(__m128d A,__m128d B)1430 int test_mm_ucomige_sd(__m128d A, __m128d B) {
1431   // CHECK-LABEL: test_mm_ucomige_sd
1432   // CHECK: call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1433   return _mm_ucomige_sd(A, B);
1434 }
1435 
test_mm_ucomigt_sd(__m128d A,__m128d B)1436 int test_mm_ucomigt_sd(__m128d A, __m128d B) {
1437   // CHECK-LABEL: test_mm_ucomigt_sd
1438   // CHECK: call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1439   return _mm_ucomigt_sd(A, B);
1440 }
1441 
test_mm_ucomile_sd(__m128d A,__m128d B)1442 int test_mm_ucomile_sd(__m128d A, __m128d B) {
1443   // CHECK-LABEL: test_mm_ucomile_sd
1444   // CHECK: call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1445   return _mm_ucomile_sd(A, B);
1446 }
1447 
test_mm_ucomilt_sd(__m128d A,__m128d B)1448 int test_mm_ucomilt_sd(__m128d A, __m128d B) {
1449   // CHECK-LABEL: test_mm_ucomilt_sd
1450   // CHECK: call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1451   return _mm_ucomilt_sd(A, B);
1452 }
1453 
test_mm_ucomineq_sd(__m128d A,__m128d B)1454 int test_mm_ucomineq_sd(__m128d A, __m128d B) {
1455   // CHECK-LABEL: test_mm_ucomineq_sd
1456   // CHECK: call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1457   return _mm_ucomineq_sd(A, B);
1458 }
1459 
test_mm_undefined_pd()1460 __m128d test_mm_undefined_pd() {
1461   // CHECK-LABEL: @test_mm_undefined_pd
1462   // CHECK: ret <2 x double> undef
1463   return _mm_undefined_pd();
1464 }
1465 
test_mm_undefined_si128()1466 __m128i test_mm_undefined_si128() {
1467   // CHECK-LABEL: @test_mm_undefined_si128
1468   // CHECK: ret <2 x i64> undef
1469   return _mm_undefined_si128();
1470 }
1471 
test_mm_unpackhi_epi8(__m128i A,__m128i B)1472 __m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) {
1473   // CHECK-LABEL: test_mm_unpackhi_epi8
1474   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1475   return _mm_unpackhi_epi8(A, B);
1476 }
1477 
test_mm_unpackhi_epi16(__m128i A,__m128i B)1478 __m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) {
1479   // CHECK-LABEL: test_mm_unpackhi_epi16
1480   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1481   return _mm_unpackhi_epi16(A, B);
1482 }
1483 
test_mm_unpackhi_epi32(__m128i A,__m128i B)1484 __m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) {
1485   // CHECK-LABEL: test_mm_unpackhi_epi32
1486   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1487   return _mm_unpackhi_epi32(A, B);
1488 }
1489 
test_mm_unpackhi_epi64(__m128i A,__m128i B)1490 __m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) {
1491   // CHECK-LABEL: test_mm_unpackhi_epi64
1492   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
1493   return _mm_unpackhi_epi64(A, B);
1494 }
1495 
test_mm_unpackhi_pd(__m128d A,__m128d B)1496 __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
1497   // CHECK-LABEL: test_mm_unpackhi_pd
1498   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
1499   return _mm_unpackhi_pd(A, B);
1500 }
1501 
test_mm_unpacklo_epi8(__m128i A,__m128i B)1502 __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
1503   // CHECK-LABEL: test_mm_unpacklo_epi8
1504   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1505   return _mm_unpacklo_epi8(A, B);
1506 }
1507 
test_mm_unpacklo_epi16(__m128i A,__m128i B)1508 __m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) {
1509   // CHECK-LABEL: test_mm_unpacklo_epi16
1510   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1511   return _mm_unpacklo_epi16(A, B);
1512 }
1513 
test_mm_unpacklo_epi32(__m128i A,__m128i B)1514 __m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) {
1515   // CHECK-LABEL: test_mm_unpacklo_epi32
1516   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1517   return _mm_unpacklo_epi32(A, B);
1518 }
1519 
test_mm_unpacklo_epi64(__m128i A,__m128i B)1520 __m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) {
1521   // CHECK-LABEL: test_mm_unpacklo_epi64
1522   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
1523   return _mm_unpacklo_epi64(A, B);
1524 }
1525 
test_mm_unpacklo_pd(__m128d A,__m128d B)1526 __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
1527   // CHECK-LABEL: test_mm_unpacklo_pd
1528   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
1529   return _mm_unpacklo_pd(A, B);
1530 }
1531 
test_mm_xor_pd(__m128d A,__m128d B)1532 __m128d test_mm_xor_pd(__m128d A, __m128d B) {
1533   // CHECK-LABEL: test_mm_xor_pd
1534   // CHECK: xor <4 x i32> %{{.*}}, %{{.*}}
1535   return _mm_xor_pd(A, B);
1536 }
1537 
test_mm_xor_si128(__m128i A,__m128i B)1538 __m128i test_mm_xor_si128(__m128i A, __m128i B) {
1539   // CHECK-LABEL: test_mm_xor_si128
1540   // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1541   return _mm_xor_si128(A, B);
1542 }
1543