• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23 
24 #ifndef __EMMINTRIN_H
25 #define __EMMINTRIN_H
26 
27 #include <xmmintrin.h>
28 
29 typedef double __m128d __attribute__((__vector_size__(16)));
30 typedef long long __m128i __attribute__((__vector_size__(16)));
31 
32 /* Type defines.  */
33 typedef double __v2df __attribute__ ((__vector_size__ (16)));
34 typedef long long __v2di __attribute__ ((__vector_size__ (16)));
35 typedef short __v8hi __attribute__((__vector_size__(16)));
36 typedef char __v16qi __attribute__((__vector_size__(16)));
37 
38 /* We need an explicitly signed variant for char. Note that this shouldn't
39  * appear in the interface though. */
40 typedef signed char __v16qs __attribute__((__vector_size__(16)));
41 
42 #include <f16cintrin.h>
43 
44 /* Define the default attributes for the functions in this file. */
45 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
46 
47 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_add_sd(__m128d __a,__m128d __b)48 _mm_add_sd(__m128d __a, __m128d __b)
49 {
50   __a[0] += __b[0];
51   return __a;
52 }
53 
54 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_add_pd(__m128d __a,__m128d __b)55 _mm_add_pd(__m128d __a, __m128d __b)
56 {
57   return __a + __b;
58 }
59 
60 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_sub_sd(__m128d __a,__m128d __b)61 _mm_sub_sd(__m128d __a, __m128d __b)
62 {
63   __a[0] -= __b[0];
64   return __a;
65 }
66 
67 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_sub_pd(__m128d __a,__m128d __b)68 _mm_sub_pd(__m128d __a, __m128d __b)
69 {
70   return __a - __b;
71 }
72 
73 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mul_sd(__m128d __a,__m128d __b)74 _mm_mul_sd(__m128d __a, __m128d __b)
75 {
76   __a[0] *= __b[0];
77   return __a;
78 }
79 
80 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mul_pd(__m128d __a,__m128d __b)81 _mm_mul_pd(__m128d __a, __m128d __b)
82 {
83   return __a * __b;
84 }
85 
86 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_div_sd(__m128d __a,__m128d __b)87 _mm_div_sd(__m128d __a, __m128d __b)
88 {
89   __a[0] /= __b[0];
90   return __a;
91 }
92 
93 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_div_pd(__m128d __a,__m128d __b)94 _mm_div_pd(__m128d __a, __m128d __b)
95 {
96   return __a / __b;
97 }
98 
99 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_sqrt_sd(__m128d __a,__m128d __b)100 _mm_sqrt_sd(__m128d __a, __m128d __b)
101 {
102   __m128d __c = __builtin_ia32_sqrtsd(__b);
103   return (__m128d) { __c[0], __a[1] };
104 }
105 
106 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_sqrt_pd(__m128d __a)107 _mm_sqrt_pd(__m128d __a)
108 {
109   return __builtin_ia32_sqrtpd(__a);
110 }
111 
112 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_min_sd(__m128d __a,__m128d __b)113 _mm_min_sd(__m128d __a, __m128d __b)
114 {
115   return __builtin_ia32_minsd(__a, __b);
116 }
117 
118 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_min_pd(__m128d __a,__m128d __b)119 _mm_min_pd(__m128d __a, __m128d __b)
120 {
121   return __builtin_ia32_minpd(__a, __b);
122 }
123 
124 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_max_sd(__m128d __a,__m128d __b)125 _mm_max_sd(__m128d __a, __m128d __b)
126 {
127   return __builtin_ia32_maxsd(__a, __b);
128 }
129 
130 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_max_pd(__m128d __a,__m128d __b)131 _mm_max_pd(__m128d __a, __m128d __b)
132 {
133   return __builtin_ia32_maxpd(__a, __b);
134 }
135 
136 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_and_pd(__m128d __a,__m128d __b)137 _mm_and_pd(__m128d __a, __m128d __b)
138 {
139   return (__m128d)((__v4si)__a & (__v4si)__b);
140 }
141 
142 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_andnot_pd(__m128d __a,__m128d __b)143 _mm_andnot_pd(__m128d __a, __m128d __b)
144 {
145   return (__m128d)(~(__v4si)__a & (__v4si)__b);
146 }
147 
148 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_or_pd(__m128d __a,__m128d __b)149 _mm_or_pd(__m128d __a, __m128d __b)
150 {
151   return (__m128d)((__v4si)__a | (__v4si)__b);
152 }
153 
154 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_xor_pd(__m128d __a,__m128d __b)155 _mm_xor_pd(__m128d __a, __m128d __b)
156 {
157   return (__m128d)((__v4si)__a ^ (__v4si)__b);
158 }
159 
160 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpeq_pd(__m128d __a,__m128d __b)161 _mm_cmpeq_pd(__m128d __a, __m128d __b)
162 {
163   return (__m128d)__builtin_ia32_cmpeqpd(__a, __b);
164 }
165 
166 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmplt_pd(__m128d __a,__m128d __b)167 _mm_cmplt_pd(__m128d __a, __m128d __b)
168 {
169   return (__m128d)__builtin_ia32_cmpltpd(__a, __b);
170 }
171 
172 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmple_pd(__m128d __a,__m128d __b)173 _mm_cmple_pd(__m128d __a, __m128d __b)
174 {
175   return (__m128d)__builtin_ia32_cmplepd(__a, __b);
176 }
177 
178 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpgt_pd(__m128d __a,__m128d __b)179 _mm_cmpgt_pd(__m128d __a, __m128d __b)
180 {
181   return (__m128d)__builtin_ia32_cmpltpd(__b, __a);
182 }
183 
184 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpge_pd(__m128d __a,__m128d __b)185 _mm_cmpge_pd(__m128d __a, __m128d __b)
186 {
187   return (__m128d)__builtin_ia32_cmplepd(__b, __a);
188 }
189 
190 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpord_pd(__m128d __a,__m128d __b)191 _mm_cmpord_pd(__m128d __a, __m128d __b)
192 {
193   return (__m128d)__builtin_ia32_cmpordpd(__a, __b);
194 }
195 
196 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpunord_pd(__m128d __a,__m128d __b)197 _mm_cmpunord_pd(__m128d __a, __m128d __b)
198 {
199   return (__m128d)__builtin_ia32_cmpunordpd(__a, __b);
200 }
201 
202 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpneq_pd(__m128d __a,__m128d __b)203 _mm_cmpneq_pd(__m128d __a, __m128d __b)
204 {
205   return (__m128d)__builtin_ia32_cmpneqpd(__a, __b);
206 }
207 
208 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpnlt_pd(__m128d __a,__m128d __b)209 _mm_cmpnlt_pd(__m128d __a, __m128d __b)
210 {
211   return (__m128d)__builtin_ia32_cmpnltpd(__a, __b);
212 }
213 
214 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpnle_pd(__m128d __a,__m128d __b)215 _mm_cmpnle_pd(__m128d __a, __m128d __b)
216 {
217   return (__m128d)__builtin_ia32_cmpnlepd(__a, __b);
218 }
219 
220 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpngt_pd(__m128d __a,__m128d __b)221 _mm_cmpngt_pd(__m128d __a, __m128d __b)
222 {
223   return (__m128d)__builtin_ia32_cmpnltpd(__b, __a);
224 }
225 
226 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpnge_pd(__m128d __a,__m128d __b)227 _mm_cmpnge_pd(__m128d __a, __m128d __b)
228 {
229   return (__m128d)__builtin_ia32_cmpnlepd(__b, __a);
230 }
231 
232 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpeq_sd(__m128d __a,__m128d __b)233 _mm_cmpeq_sd(__m128d __a, __m128d __b)
234 {
235   return (__m128d)__builtin_ia32_cmpeqsd(__a, __b);
236 }
237 
238 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmplt_sd(__m128d __a,__m128d __b)239 _mm_cmplt_sd(__m128d __a, __m128d __b)
240 {
241   return (__m128d)__builtin_ia32_cmpltsd(__a, __b);
242 }
243 
244 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmple_sd(__m128d __a,__m128d __b)245 _mm_cmple_sd(__m128d __a, __m128d __b)
246 {
247   return (__m128d)__builtin_ia32_cmplesd(__a, __b);
248 }
249 
250 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpgt_sd(__m128d __a,__m128d __b)251 _mm_cmpgt_sd(__m128d __a, __m128d __b)
252 {
253   __m128d __c = __builtin_ia32_cmpltsd(__b, __a);
254   return (__m128d) { __c[0], __a[1] };
255 }
256 
257 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpge_sd(__m128d __a,__m128d __b)258 _mm_cmpge_sd(__m128d __a, __m128d __b)
259 {
260   __m128d __c = __builtin_ia32_cmplesd(__b, __a);
261   return (__m128d) { __c[0], __a[1] };
262 }
263 
264 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpord_sd(__m128d __a,__m128d __b)265 _mm_cmpord_sd(__m128d __a, __m128d __b)
266 {
267   return (__m128d)__builtin_ia32_cmpordsd(__a, __b);
268 }
269 
270 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpunord_sd(__m128d __a,__m128d __b)271 _mm_cmpunord_sd(__m128d __a, __m128d __b)
272 {
273   return (__m128d)__builtin_ia32_cmpunordsd(__a, __b);
274 }
275 
276 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpneq_sd(__m128d __a,__m128d __b)277 _mm_cmpneq_sd(__m128d __a, __m128d __b)
278 {
279   return (__m128d)__builtin_ia32_cmpneqsd(__a, __b);
280 }
281 
282 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpnlt_sd(__m128d __a,__m128d __b)283 _mm_cmpnlt_sd(__m128d __a, __m128d __b)
284 {
285   return (__m128d)__builtin_ia32_cmpnltsd(__a, __b);
286 }
287 
288 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpnle_sd(__m128d __a,__m128d __b)289 _mm_cmpnle_sd(__m128d __a, __m128d __b)
290 {
291   return (__m128d)__builtin_ia32_cmpnlesd(__a, __b);
292 }
293 
294 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpngt_sd(__m128d __a,__m128d __b)295 _mm_cmpngt_sd(__m128d __a, __m128d __b)
296 {
297   __m128d __c = __builtin_ia32_cmpnltsd(__b, __a);
298   return (__m128d) { __c[0], __a[1] };
299 }
300 
301 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpnge_sd(__m128d __a,__m128d __b)302 _mm_cmpnge_sd(__m128d __a, __m128d __b)
303 {
304   __m128d __c = __builtin_ia32_cmpnlesd(__b, __a);
305   return (__m128d) { __c[0], __a[1] };
306 }
307 
308 static __inline__ int __DEFAULT_FN_ATTRS
_mm_comieq_sd(__m128d __a,__m128d __b)309 _mm_comieq_sd(__m128d __a, __m128d __b)
310 {
311   return __builtin_ia32_comisdeq(__a, __b);
312 }
313 
314 static __inline__ int __DEFAULT_FN_ATTRS
_mm_comilt_sd(__m128d __a,__m128d __b)315 _mm_comilt_sd(__m128d __a, __m128d __b)
316 {
317   return __builtin_ia32_comisdlt(__a, __b);
318 }
319 
320 static __inline__ int __DEFAULT_FN_ATTRS
_mm_comile_sd(__m128d __a,__m128d __b)321 _mm_comile_sd(__m128d __a, __m128d __b)
322 {
323   return __builtin_ia32_comisdle(__a, __b);
324 }
325 
326 static __inline__ int __DEFAULT_FN_ATTRS
_mm_comigt_sd(__m128d __a,__m128d __b)327 _mm_comigt_sd(__m128d __a, __m128d __b)
328 {
329   return __builtin_ia32_comisdgt(__a, __b);
330 }
331 
332 static __inline__ int __DEFAULT_FN_ATTRS
_mm_comige_sd(__m128d __a,__m128d __b)333 _mm_comige_sd(__m128d __a, __m128d __b)
334 {
335   return __builtin_ia32_comisdge(__a, __b);
336 }
337 
338 static __inline__ int __DEFAULT_FN_ATTRS
_mm_comineq_sd(__m128d __a,__m128d __b)339 _mm_comineq_sd(__m128d __a, __m128d __b)
340 {
341   return __builtin_ia32_comisdneq(__a, __b);
342 }
343 
344 static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomieq_sd(__m128d __a,__m128d __b)345 _mm_ucomieq_sd(__m128d __a, __m128d __b)
346 {
347   return __builtin_ia32_ucomisdeq(__a, __b);
348 }
349 
350 static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomilt_sd(__m128d __a,__m128d __b)351 _mm_ucomilt_sd(__m128d __a, __m128d __b)
352 {
353   return __builtin_ia32_ucomisdlt(__a, __b);
354 }
355 
356 static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomile_sd(__m128d __a,__m128d __b)357 _mm_ucomile_sd(__m128d __a, __m128d __b)
358 {
359   return __builtin_ia32_ucomisdle(__a, __b);
360 }
361 
362 static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomigt_sd(__m128d __a,__m128d __b)363 _mm_ucomigt_sd(__m128d __a, __m128d __b)
364 {
365   return __builtin_ia32_ucomisdgt(__a, __b);
366 }
367 
368 static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomige_sd(__m128d __a,__m128d __b)369 _mm_ucomige_sd(__m128d __a, __m128d __b)
370 {
371   return __builtin_ia32_ucomisdge(__a, __b);
372 }
373 
374 static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomineq_sd(__m128d __a,__m128d __b)375 _mm_ucomineq_sd(__m128d __a, __m128d __b)
376 {
377   return __builtin_ia32_ucomisdneq(__a, __b);
378 }
379 
380 static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_cvtpd_ps(__m128d __a)381 _mm_cvtpd_ps(__m128d __a)
382 {
383   return __builtin_ia32_cvtpd2ps(__a);
384 }
385 
386 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cvtps_pd(__m128 __a)387 _mm_cvtps_pd(__m128 __a)
388 {
389   return __builtin_ia32_cvtps2pd(__a);
390 }
391 
392 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cvtepi32_pd(__m128i __a)393 _mm_cvtepi32_pd(__m128i __a)
394 {
395   return __builtin_ia32_cvtdq2pd((__v4si)__a);
396 }
397 
398 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtpd_epi32(__m128d __a)399 _mm_cvtpd_epi32(__m128d __a)
400 {
401   return __builtin_ia32_cvtpd2dq(__a);
402 }
403 
404 static __inline__ int __DEFAULT_FN_ATTRS
_mm_cvtsd_si32(__m128d __a)405 _mm_cvtsd_si32(__m128d __a)
406 {
407   return __builtin_ia32_cvtsd2si(__a);
408 }
409 
410 static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_cvtsd_ss(__m128 __a,__m128d __b)411 _mm_cvtsd_ss(__m128 __a, __m128d __b)
412 {
413   __a[0] = __b[0];
414   return __a;
415 }
416 
417 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cvtsi32_sd(__m128d __a,int __b)418 _mm_cvtsi32_sd(__m128d __a, int __b)
419 {
420   __a[0] = __b;
421   return __a;
422 }
423 
424 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cvtss_sd(__m128d __a,__m128 __b)425 _mm_cvtss_sd(__m128d __a, __m128 __b)
426 {
427   __a[0] = __b[0];
428   return __a;
429 }
430 
431 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvttpd_epi32(__m128d __a)432 _mm_cvttpd_epi32(__m128d __a)
433 {
434   return (__m128i)__builtin_ia32_cvttpd2dq(__a);
435 }
436 
437 static __inline__ int __DEFAULT_FN_ATTRS
_mm_cvttsd_si32(__m128d __a)438 _mm_cvttsd_si32(__m128d __a)
439 {
440   return __a[0];
441 }
442 
443 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_cvtpd_pi32(__m128d __a)444 _mm_cvtpd_pi32(__m128d __a)
445 {
446   return (__m64)__builtin_ia32_cvtpd2pi(__a);
447 }
448 
449 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_cvttpd_pi32(__m128d __a)450 _mm_cvttpd_pi32(__m128d __a)
451 {
452   return (__m64)__builtin_ia32_cvttpd2pi(__a);
453 }
454 
455 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cvtpi32_pd(__m64 __a)456 _mm_cvtpi32_pd(__m64 __a)
457 {
458   return __builtin_ia32_cvtpi2pd((__v2si)__a);
459 }
460 
461 static __inline__ double __DEFAULT_FN_ATTRS
_mm_cvtsd_f64(__m128d __a)462 _mm_cvtsd_f64(__m128d __a)
463 {
464   return __a[0];
465 }
466 
467 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_load_pd(double const * __dp)468 _mm_load_pd(double const *__dp)
469 {
470   return *(__m128d*)__dp;
471 }
472 
473 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_load1_pd(double const * __dp)474 _mm_load1_pd(double const *__dp)
475 {
476   struct __mm_load1_pd_struct {
477     double __u;
478   } __attribute__((__packed__, __may_alias__));
479   double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;
480   return (__m128d){ __u, __u };
481 }
482 
483 #define        _mm_load_pd1(dp)        _mm_load1_pd(dp)
484 
485 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_loadr_pd(double const * __dp)486 _mm_loadr_pd(double const *__dp)
487 {
488   __m128d __u = *(__m128d*)__dp;
489   return __builtin_shufflevector(__u, __u, 1, 0);
490 }
491 
492 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_loadu_pd(double const * __dp)493 _mm_loadu_pd(double const *__dp)
494 {
495   struct __loadu_pd {
496     __m128d __v;
497   } __attribute__((__packed__, __may_alias__));
498   return ((struct __loadu_pd*)__dp)->__v;
499 }
500 
501 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_load_sd(double const * __dp)502 _mm_load_sd(double const *__dp)
503 {
504   struct __mm_load_sd_struct {
505     double __u;
506   } __attribute__((__packed__, __may_alias__));
507   double __u = ((struct __mm_load_sd_struct*)__dp)->__u;
508   return (__m128d){ __u, 0 };
509 }
510 
511 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_loadh_pd(__m128d __a,double const * __dp)512 _mm_loadh_pd(__m128d __a, double const *__dp)
513 {
514   struct __mm_loadh_pd_struct {
515     double __u;
516   } __attribute__((__packed__, __may_alias__));
517   double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;
518   return (__m128d){ __a[0], __u };
519 }
520 
521 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_loadl_pd(__m128d __a,double const * __dp)522 _mm_loadl_pd(__m128d __a, double const *__dp)
523 {
524   struct __mm_loadl_pd_struct {
525     double __u;
526   } __attribute__((__packed__, __may_alias__));
527   double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;
528   return (__m128d){ __u, __a[1] };
529 }
530 
531 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_undefined_pd()532 _mm_undefined_pd()
533 {
534   return (__m128d)__builtin_ia32_undef128();
535 }
536 
537 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_set_sd(double __w)538 _mm_set_sd(double __w)
539 {
540   return (__m128d){ __w, 0 };
541 }
542 
543 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_set1_pd(double __w)544 _mm_set1_pd(double __w)
545 {
546   return (__m128d){ __w, __w };
547 }
548 
549 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_set_pd(double __w,double __x)550 _mm_set_pd(double __w, double __x)
551 {
552   return (__m128d){ __x, __w };
553 }
554 
555 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_setr_pd(double __w,double __x)556 _mm_setr_pd(double __w, double __x)
557 {
558   return (__m128d){ __w, __x };
559 }
560 
561 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_setzero_pd(void)562 _mm_setzero_pd(void)
563 {
564   return (__m128d){ 0, 0 };
565 }
566 
567 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_move_sd(__m128d __a,__m128d __b)568 _mm_move_sd(__m128d __a, __m128d __b)
569 {
570   return (__m128d){ __b[0], __a[1] };
571 }
572 
573 static __inline__ void __DEFAULT_FN_ATTRS
_mm_store_sd(double * __dp,__m128d __a)574 _mm_store_sd(double *__dp, __m128d __a)
575 {
576   struct __mm_store_sd_struct {
577     double __u;
578   } __attribute__((__packed__, __may_alias__));
579   ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
580 }
581 
582 static __inline__ void __DEFAULT_FN_ATTRS
_mm_store1_pd(double * __dp,__m128d __a)583 _mm_store1_pd(double *__dp, __m128d __a)
584 {
585   struct __mm_store1_pd_struct {
586     double __u[2];
587   } __attribute__((__packed__, __may_alias__));
588   ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
589   ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
590 }
591 
592 static __inline__ void __DEFAULT_FN_ATTRS
_mm_store_pd(double * __dp,__m128d __a)593 _mm_store_pd(double *__dp, __m128d __a)
594 {
595   *(__m128d *)__dp = __a;
596 }
597 
598 static __inline__ void __DEFAULT_FN_ATTRS
_mm_storeu_pd(double * __dp,__m128d __a)599 _mm_storeu_pd(double *__dp, __m128d __a)
600 {
601   __builtin_ia32_storeupd(__dp, __a);
602 }
603 
604 static __inline__ void __DEFAULT_FN_ATTRS
_mm_storer_pd(double * __dp,__m128d __a)605 _mm_storer_pd(double *__dp, __m128d __a)
606 {
607   __a = __builtin_shufflevector(__a, __a, 1, 0);
608   *(__m128d *)__dp = __a;
609 }
610 
611 static __inline__ void __DEFAULT_FN_ATTRS
_mm_storeh_pd(double * __dp,__m128d __a)612 _mm_storeh_pd(double *__dp, __m128d __a)
613 {
614   struct __mm_storeh_pd_struct {
615     double __u;
616   } __attribute__((__packed__, __may_alias__));
617   ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
618 }
619 
620 static __inline__ void __DEFAULT_FN_ATTRS
_mm_storel_pd(double * __dp,__m128d __a)621 _mm_storel_pd(double *__dp, __m128d __a)
622 {
623   struct __mm_storeh_pd_struct {
624     double __u;
625   } __attribute__((__packed__, __may_alias__));
626   ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
627 }
628 
629 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_add_epi8(__m128i __a,__m128i __b)630 _mm_add_epi8(__m128i __a, __m128i __b)
631 {
632   return (__m128i)((__v16qi)__a + (__v16qi)__b);
633 }
634 
635 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_add_epi16(__m128i __a,__m128i __b)636 _mm_add_epi16(__m128i __a, __m128i __b)
637 {
638   return (__m128i)((__v8hi)__a + (__v8hi)__b);
639 }
640 
641 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_add_epi32(__m128i __a,__m128i __b)642 _mm_add_epi32(__m128i __a, __m128i __b)
643 {
644   return (__m128i)((__v4si)__a + (__v4si)__b);
645 }
646 
647 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_add_si64(__m64 __a,__m64 __b)648 _mm_add_si64(__m64 __a, __m64 __b)
649 {
650   return (__m64)__builtin_ia32_paddq(__a, __b);
651 }
652 
653 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_add_epi64(__m128i __a,__m128i __b)654 _mm_add_epi64(__m128i __a, __m128i __b)
655 {
656   return __a + __b;
657 }
658 
659 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_adds_epi8(__m128i __a,__m128i __b)660 _mm_adds_epi8(__m128i __a, __m128i __b)
661 {
662   return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
663 }
664 
665 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_adds_epi16(__m128i __a,__m128i __b)666 _mm_adds_epi16(__m128i __a, __m128i __b)
667 {
668   return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
669 }
670 
671 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_adds_epu8(__m128i __a,__m128i __b)672 _mm_adds_epu8(__m128i __a, __m128i __b)
673 {
674   return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
675 }
676 
677 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_adds_epu16(__m128i __a,__m128i __b)678 _mm_adds_epu16(__m128i __a, __m128i __b)
679 {
680   return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
681 }
682 
683 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_avg_epu8(__m128i __a,__m128i __b)684 _mm_avg_epu8(__m128i __a, __m128i __b)
685 {
686   return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
687 }
688 
689 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_avg_epu16(__m128i __a,__m128i __b)690 _mm_avg_epu16(__m128i __a, __m128i __b)
691 {
692   return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
693 }
694 
695 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_madd_epi16(__m128i __a,__m128i __b)696 _mm_madd_epi16(__m128i __a, __m128i __b)
697 {
698   return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
699 }
700 
701 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_max_epi16(__m128i __a,__m128i __b)702 _mm_max_epi16(__m128i __a, __m128i __b)
703 {
704   return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
705 }
706 
707 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_max_epu8(__m128i __a,__m128i __b)708 _mm_max_epu8(__m128i __a, __m128i __b)
709 {
710   return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
711 }
712 
713 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_min_epi16(__m128i __a,__m128i __b)714 _mm_min_epi16(__m128i __a, __m128i __b)
715 {
716   return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
717 }
718 
719 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_min_epu8(__m128i __a,__m128i __b)720 _mm_min_epu8(__m128i __a, __m128i __b)
721 {
722   return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
723 }
724 
725 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mulhi_epi16(__m128i __a,__m128i __b)726 _mm_mulhi_epi16(__m128i __a, __m128i __b)
727 {
728   return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
729 }
730 
731 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mulhi_epu16(__m128i __a,__m128i __b)732 _mm_mulhi_epu16(__m128i __a, __m128i __b)
733 {
734   return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
735 }
736 
737 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mullo_epi16(__m128i __a,__m128i __b)738 _mm_mullo_epi16(__m128i __a, __m128i __b)
739 {
740   return (__m128i)((__v8hi)__a * (__v8hi)__b);
741 }
742 
743 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_mul_su32(__m64 __a,__m64 __b)744 _mm_mul_su32(__m64 __a, __m64 __b)
745 {
746   return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
747 }
748 
749 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mul_epu32(__m128i __a,__m128i __b)750 _mm_mul_epu32(__m128i __a, __m128i __b)
751 {
752   return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
753 }
754 
755 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sad_epu8(__m128i __a,__m128i __b)756 _mm_sad_epu8(__m128i __a, __m128i __b)
757 {
758   return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
759 }
760 
761 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sub_epi8(__m128i __a,__m128i __b)762 _mm_sub_epi8(__m128i __a, __m128i __b)
763 {
764   return (__m128i)((__v16qi)__a - (__v16qi)__b);
765 }
766 
767 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sub_epi16(__m128i __a,__m128i __b)768 _mm_sub_epi16(__m128i __a, __m128i __b)
769 {
770   return (__m128i)((__v8hi)__a - (__v8hi)__b);
771 }
772 
773 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sub_epi32(__m128i __a,__m128i __b)774 _mm_sub_epi32(__m128i __a, __m128i __b)
775 {
776   return (__m128i)((__v4si)__a - (__v4si)__b);
777 }
778 
779 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sub_si64(__m64 __a,__m64 __b)780 _mm_sub_si64(__m64 __a, __m64 __b)
781 {
782   return (__m64)__builtin_ia32_psubq(__a, __b);
783 }
784 
785 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sub_epi64(__m128i __a,__m128i __b)786 _mm_sub_epi64(__m128i __a, __m128i __b)
787 {
788   return __a - __b;
789 }
790 
791 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_subs_epi8(__m128i __a,__m128i __b)792 _mm_subs_epi8(__m128i __a, __m128i __b)
793 {
794   return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
795 }
796 
797 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_subs_epi16(__m128i __a,__m128i __b)798 _mm_subs_epi16(__m128i __a, __m128i __b)
799 {
800   return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
801 }
802 
803 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_subs_epu8(__m128i __a,__m128i __b)804 _mm_subs_epu8(__m128i __a, __m128i __b)
805 {
806   return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
807 }
808 
809 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_subs_epu16(__m128i __a,__m128i __b)810 _mm_subs_epu16(__m128i __a, __m128i __b)
811 {
812   return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
813 }
814 
815 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_and_si128(__m128i __a,__m128i __b)816 _mm_and_si128(__m128i __a, __m128i __b)
817 {
818   return __a & __b;
819 }
820 
821 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_andnot_si128(__m128i __a,__m128i __b)822 _mm_andnot_si128(__m128i __a, __m128i __b)
823 {
824   return ~__a & __b;
825 }
826 
827 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_or_si128(__m128i __a,__m128i __b)828 _mm_or_si128(__m128i __a, __m128i __b)
829 {
830   return __a | __b;
831 }
832 
833 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_xor_si128(__m128i __a,__m128i __b)834 _mm_xor_si128(__m128i __a, __m128i __b)
835 {
836   return __a ^ __b;
837 }
838 
839 #define _mm_slli_si128(a, imm) __extension__ ({                         \
840   (__m128i)__builtin_shufflevector((__v16qi)_mm_setzero_si128(),        \
841                                    (__v16qi)(__m128i)(a),               \
842                                    ((imm)&0xF0) ? 0 : 16 - ((imm)&0xF), \
843                                    ((imm)&0xF0) ? 0 : 17 - ((imm)&0xF), \
844                                    ((imm)&0xF0) ? 0 : 18 - ((imm)&0xF), \
845                                    ((imm)&0xF0) ? 0 : 19 - ((imm)&0xF), \
846                                    ((imm)&0xF0) ? 0 : 20 - ((imm)&0xF), \
847                                    ((imm)&0xF0) ? 0 : 21 - ((imm)&0xF), \
848                                    ((imm)&0xF0) ? 0 : 22 - ((imm)&0xF), \
849                                    ((imm)&0xF0) ? 0 : 23 - ((imm)&0xF), \
850                                    ((imm)&0xF0) ? 0 : 24 - ((imm)&0xF), \
851                                    ((imm)&0xF0) ? 0 : 25 - ((imm)&0xF), \
852                                    ((imm)&0xF0) ? 0 : 26 - ((imm)&0xF), \
853                                    ((imm)&0xF0) ? 0 : 27 - ((imm)&0xF), \
854                                    ((imm)&0xF0) ? 0 : 28 - ((imm)&0xF), \
855                                    ((imm)&0xF0) ? 0 : 29 - ((imm)&0xF), \
856                                    ((imm)&0xF0) ? 0 : 30 - ((imm)&0xF), \
857                                    ((imm)&0xF0) ? 0 : 31 - ((imm)&0xF)); })
858 
859 #define _mm_bslli_si128(a, imm) \
860   _mm_slli_si128((a), (imm))
861 
862 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_slli_epi16(__m128i __a,int __count)863 _mm_slli_epi16(__m128i __a, int __count)
864 {
865   return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
866 }
867 
868 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sll_epi16(__m128i __a,__m128i __count)869 _mm_sll_epi16(__m128i __a, __m128i __count)
870 {
871   return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
872 }
873 
874 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_slli_epi32(__m128i __a,int __count)875 _mm_slli_epi32(__m128i __a, int __count)
876 {
877   return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
878 }
879 
880 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sll_epi32(__m128i __a,__m128i __count)881 _mm_sll_epi32(__m128i __a, __m128i __count)
882 {
883   return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
884 }
885 
886 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_slli_epi64(__m128i __a,int __count)887 _mm_slli_epi64(__m128i __a, int __count)
888 {
889   return __builtin_ia32_psllqi128(__a, __count);
890 }
891 
892 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sll_epi64(__m128i __a,__m128i __count)893 _mm_sll_epi64(__m128i __a, __m128i __count)
894 {
895   return __builtin_ia32_psllq128(__a, __count);
896 }
897 
898 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srai_epi16(__m128i __a,int __count)899 _mm_srai_epi16(__m128i __a, int __count)
900 {
901   return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
902 }
903 
904 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sra_epi16(__m128i __a,__m128i __count)905 _mm_sra_epi16(__m128i __a, __m128i __count)
906 {
907   return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
908 }
909 
910 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srai_epi32(__m128i __a,int __count)911 _mm_srai_epi32(__m128i __a, int __count)
912 {
913   return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
914 }
915 
916 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sra_epi32(__m128i __a,__m128i __count)917 _mm_sra_epi32(__m128i __a, __m128i __count)
918 {
919   return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
920 }
921 
922 #define _mm_srli_si128(a, imm) __extension__ ({                          \
923   (__m128i)__builtin_shufflevector((__v16qi)(__m128i)(a),                \
924                                    (__v16qi)_mm_setzero_si128(),         \
925                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 0,  \
926                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 1,  \
927                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 2,  \
928                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 3,  \
929                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 4,  \
930                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 5,  \
931                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 6,  \
932                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 7,  \
933                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 8,  \
934                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 9,  \
935                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 10, \
936                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 11, \
937                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 12, \
938                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 13, \
939                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 14, \
940                                    ((imm)&0xF0) ? 16 : ((imm)&0xF) + 15); })
941 
942 #define _mm_bsrli_si128(a, imm) \
943   _mm_srli_si128((a), (imm))
944 
945 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srli_epi16(__m128i __a,int __count)946 _mm_srli_epi16(__m128i __a, int __count)
947 {
948   return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
949 }
950 
951 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srl_epi16(__m128i __a,__m128i __count)952 _mm_srl_epi16(__m128i __a, __m128i __count)
953 {
954   return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
955 }
956 
957 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srli_epi32(__m128i __a,int __count)958 _mm_srli_epi32(__m128i __a, int __count)
959 {
960   return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
961 }
962 
963 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srl_epi32(__m128i __a,__m128i __count)964 _mm_srl_epi32(__m128i __a, __m128i __count)
965 {
966   return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
967 }
968 
969 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srli_epi64(__m128i __a,int __count)970 _mm_srli_epi64(__m128i __a, int __count)
971 {
972   return __builtin_ia32_psrlqi128(__a, __count);
973 }
974 
975 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srl_epi64(__m128i __a,__m128i __count)976 _mm_srl_epi64(__m128i __a, __m128i __count)
977 {
978   return __builtin_ia32_psrlq128(__a, __count);
979 }
980 
981 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmpeq_epi8(__m128i __a,__m128i __b)982 _mm_cmpeq_epi8(__m128i __a, __m128i __b)
983 {
984   return (__m128i)((__v16qi)__a == (__v16qi)__b);
985 }
986 
987 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmpeq_epi16(__m128i __a,__m128i __b)988 _mm_cmpeq_epi16(__m128i __a, __m128i __b)
989 {
990   return (__m128i)((__v8hi)__a == (__v8hi)__b);
991 }
992 
993 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmpeq_epi32(__m128i __a,__m128i __b)994 _mm_cmpeq_epi32(__m128i __a, __m128i __b)
995 {
996   return (__m128i)((__v4si)__a == (__v4si)__b);
997 }
998 
999 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmpgt_epi8(__m128i __a,__m128i __b)1000 _mm_cmpgt_epi8(__m128i __a, __m128i __b)
1001 {
1002   /* This function always performs a signed comparison, but __v16qi is a char
1003      which may be signed or unsigned, so use __v16qs. */
1004   return (__m128i)((__v16qs)__a > (__v16qs)__b);
1005 }
1006 
1007 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmpgt_epi16(__m128i __a,__m128i __b)1008 _mm_cmpgt_epi16(__m128i __a, __m128i __b)
1009 {
1010   return (__m128i)((__v8hi)__a > (__v8hi)__b);
1011 }
1012 
1013 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmpgt_epi32(__m128i __a,__m128i __b)1014 _mm_cmpgt_epi32(__m128i __a, __m128i __b)
1015 {
1016   return (__m128i)((__v4si)__a > (__v4si)__b);
1017 }
1018 
1019 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmplt_epi8(__m128i __a,__m128i __b)1020 _mm_cmplt_epi8(__m128i __a, __m128i __b)
1021 {
1022   return _mm_cmpgt_epi8(__b, __a);
1023 }
1024 
1025 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmplt_epi16(__m128i __a,__m128i __b)1026 _mm_cmplt_epi16(__m128i __a, __m128i __b)
1027 {
1028   return _mm_cmpgt_epi16(__b, __a);
1029 }
1030 
1031 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmplt_epi32(__m128i __a,__m128i __b)1032 _mm_cmplt_epi32(__m128i __a, __m128i __b)
1033 {
1034   return _mm_cmpgt_epi32(__b, __a);
1035 }
1036 
1037 #ifdef __x86_64__
1038 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cvtsi64_sd(__m128d __a,long long __b)1039 _mm_cvtsi64_sd(__m128d __a, long long __b)
1040 {
1041   __a[0] = __b;
1042   return __a;
1043 }
1044 
1045 static __inline__ long long __DEFAULT_FN_ATTRS
_mm_cvtsd_si64(__m128d __a)1046 _mm_cvtsd_si64(__m128d __a)
1047 {
1048   return __builtin_ia32_cvtsd2si64(__a);
1049 }
1050 
1051 static __inline__ long long __DEFAULT_FN_ATTRS
_mm_cvttsd_si64(__m128d __a)1052 _mm_cvttsd_si64(__m128d __a)
1053 {
1054   return __a[0];
1055 }
1056 #endif
1057 
1058 static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_cvtepi32_ps(__m128i __a)1059 _mm_cvtepi32_ps(__m128i __a)
1060 {
1061   return __builtin_ia32_cvtdq2ps((__v4si)__a);
1062 }
1063 
1064 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtps_epi32(__m128 __a)1065 _mm_cvtps_epi32(__m128 __a)
1066 {
1067   return (__m128i)__builtin_ia32_cvtps2dq(__a);
1068 }
1069 
1070 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvttps_epi32(__m128 __a)1071 _mm_cvttps_epi32(__m128 __a)
1072 {
1073   return (__m128i)__builtin_ia32_cvttps2dq(__a);
1074 }
1075 
1076 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtsi32_si128(int __a)1077 _mm_cvtsi32_si128(int __a)
1078 {
1079   return (__m128i)(__v4si){ __a, 0, 0, 0 };
1080 }
1081 
1082 #ifdef __x86_64__
1083 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtsi64_si128(long long __a)1084 _mm_cvtsi64_si128(long long __a)
1085 {
1086   return (__m128i){ __a, 0 };
1087 }
1088 #endif
1089 
1090 static __inline__ int __DEFAULT_FN_ATTRS
_mm_cvtsi128_si32(__m128i __a)1091 _mm_cvtsi128_si32(__m128i __a)
1092 {
1093   __v4si __b = (__v4si)__a;
1094   return __b[0];
1095 }
1096 
1097 #ifdef __x86_64__
1098 static __inline__ long long __DEFAULT_FN_ATTRS
_mm_cvtsi128_si64(__m128i __a)1099 _mm_cvtsi128_si64(__m128i __a)
1100 {
1101   return __a[0];
1102 }
1103 #endif
1104 
1105 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_load_si128(__m128i const * __p)1106 _mm_load_si128(__m128i const *__p)
1107 {
1108   return *__p;
1109 }
1110 
1111 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_loadu_si128(__m128i const * __p)1112 _mm_loadu_si128(__m128i const *__p)
1113 {
1114   struct __loadu_si128 {
1115     __m128i __v;
1116   } __attribute__((__packed__, __may_alias__));
1117   return ((struct __loadu_si128*)__p)->__v;
1118 }
1119 
1120 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_loadl_epi64(__m128i const * __p)1121 _mm_loadl_epi64(__m128i const *__p)
1122 {
1123   struct __mm_loadl_epi64_struct {
1124     long long __u;
1125   } __attribute__((__packed__, __may_alias__));
1126   return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
1127 }
1128 
1129 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_undefined_si128()1130 _mm_undefined_si128()
1131 {
1132   return (__m128i)__builtin_ia32_undef128();
1133 }
1134 
1135 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_set_epi64x(long long __q1,long long __q0)1136 _mm_set_epi64x(long long __q1, long long __q0)
1137 {
1138   return (__m128i){ __q0, __q1 };
1139 }
1140 
1141 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_set_epi64(__m64 __q1,__m64 __q0)1142 _mm_set_epi64(__m64 __q1, __m64 __q0)
1143 {
1144   return (__m128i){ (long long)__q0, (long long)__q1 };
1145 }
1146 
1147 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_set_epi32(int __i3,int __i2,int __i1,int __i0)1148 _mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
1149 {
1150   return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
1151 }
1152 
1153 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_set_epi16(short __w7,short __w6,short __w5,short __w4,short __w3,short __w2,short __w1,short __w0)1154 _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
1155 {
1156   return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
1157 }
1158 
1159 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_set_epi8(char __b15,char __b14,char __b13,char __b12,char __b11,char __b10,char __b9,char __b8,char __b7,char __b6,char __b5,char __b4,char __b3,char __b2,char __b1,char __b0)1160 _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
1161 {
1162   return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
1163 }
1164 
1165 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_set1_epi64x(long long __q)1166 _mm_set1_epi64x(long long __q)
1167 {
1168   return (__m128i){ __q, __q };
1169 }
1170 
1171 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_set1_epi64(__m64 __q)1172 _mm_set1_epi64(__m64 __q)
1173 {
1174   return (__m128i){ (long long)__q, (long long)__q };
1175 }
1176 
1177 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_set1_epi32(int __i)1178 _mm_set1_epi32(int __i)
1179 {
1180   return (__m128i)(__v4si){ __i, __i, __i, __i };
1181 }
1182 
1183 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_set1_epi16(short __w)1184 _mm_set1_epi16(short __w)
1185 {
1186   return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
1187 }
1188 
1189 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_set1_epi8(char __b)1190 _mm_set1_epi8(char __b)
1191 {
1192   return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b };
1193 }
1194 
1195 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_setr_epi64(__m64 __q0,__m64 __q1)1196 _mm_setr_epi64(__m64 __q0, __m64 __q1)
1197 {
1198   return (__m128i){ (long long)__q0, (long long)__q1 };
1199 }
1200 
1201 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_setr_epi32(int __i0,int __i1,int __i2,int __i3)1202 _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
1203 {
1204   return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
1205 }
1206 
1207 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_setr_epi16(short __w0,short __w1,short __w2,short __w3,short __w4,short __w5,short __w6,short __w7)1208 _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
1209 {
1210   return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
1211 }
1212 
1213 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_setr_epi8(char __b0,char __b1,char __b2,char __b3,char __b4,char __b5,char __b6,char __b7,char __b8,char __b9,char __b10,char __b11,char __b12,char __b13,char __b14,char __b15)1214 _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
1215 {
1216   return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
1217 }
1218 
1219 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_setzero_si128(void)1220 _mm_setzero_si128(void)
1221 {
1222   return (__m128i){ 0LL, 0LL };
1223 }
1224 
1225 static __inline__ void __DEFAULT_FN_ATTRS
_mm_store_si128(__m128i * __p,__m128i __b)1226 _mm_store_si128(__m128i *__p, __m128i __b)
1227 {
1228   *__p = __b;
1229 }
1230 
1231 static __inline__ void __DEFAULT_FN_ATTRS
_mm_storeu_si128(__m128i * __p,__m128i __b)1232 _mm_storeu_si128(__m128i *__p, __m128i __b)
1233 {
1234   __builtin_ia32_storedqu((char *)__p, (__v16qi)__b);
1235 }
1236 
1237 static __inline__ void __DEFAULT_FN_ATTRS
_mm_maskmoveu_si128(__m128i __d,__m128i __n,char * __p)1238 _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
1239 {
1240   __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
1241 }
1242 
1243 static __inline__ void __DEFAULT_FN_ATTRS
_mm_storel_epi64(__m128i * __p,__m128i __a)1244 _mm_storel_epi64(__m128i *__p, __m128i __a)
1245 {
1246   struct __mm_storel_epi64_struct {
1247     long long __u;
1248   } __attribute__((__packed__, __may_alias__));
1249   ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
1250 }
1251 
1252 static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_pd(double * __p,__m128d __a)1253 _mm_stream_pd(double *__p, __m128d __a)
1254 {
1255   __builtin_ia32_movntpd(__p, __a);
1256 }
1257 
1258 static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_si128(__m128i * __p,__m128i __a)1259 _mm_stream_si128(__m128i *__p, __m128i __a)
1260 {
1261   __builtin_ia32_movntdq(__p, __a);
1262 }
1263 
1264 static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_si32(int * __p,int __a)1265 _mm_stream_si32(int *__p, int __a)
1266 {
1267   __builtin_ia32_movnti(__p, __a);
1268 }
1269 
1270 #ifdef __x86_64__
1271 static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_si64(long long * __p,long long __a)1272 _mm_stream_si64(long long *__p, long long __a)
1273 {
1274   __builtin_ia32_movnti64(__p, __a);
1275 }
1276 #endif
1277 
1278 static __inline__ void __DEFAULT_FN_ATTRS
_mm_clflush(void const * __p)1279 _mm_clflush(void const *__p)
1280 {
1281   __builtin_ia32_clflush(__p);
1282 }
1283 
1284 static __inline__ void __DEFAULT_FN_ATTRS
_mm_lfence(void)1285 _mm_lfence(void)
1286 {
1287   __builtin_ia32_lfence();
1288 }
1289 
1290 static __inline__ void __DEFAULT_FN_ATTRS
_mm_mfence(void)1291 _mm_mfence(void)
1292 {
1293   __builtin_ia32_mfence();
1294 }
1295 
1296 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_packs_epi16(__m128i __a,__m128i __b)1297 _mm_packs_epi16(__m128i __a, __m128i __b)
1298 {
1299   return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
1300 }
1301 
1302 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_packs_epi32(__m128i __a,__m128i __b)1303 _mm_packs_epi32(__m128i __a, __m128i __b)
1304 {
1305   return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
1306 }
1307 
1308 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_packus_epi16(__m128i __a,__m128i __b)1309 _mm_packus_epi16(__m128i __a, __m128i __b)
1310 {
1311   return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
1312 }
1313 
1314 static __inline__ int __DEFAULT_FN_ATTRS
_mm_extract_epi16(__m128i __a,int __imm)1315 _mm_extract_epi16(__m128i __a, int __imm)
1316 {
1317   __v8hi __b = (__v8hi)__a;
1318   return (unsigned short)__b[__imm & 7];
1319 }
1320 
1321 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_insert_epi16(__m128i __a,int __b,int __imm)1322 _mm_insert_epi16(__m128i __a, int __b, int __imm)
1323 {
1324   __v8hi __c = (__v8hi)__a;
1325   __c[__imm & 7] = __b;
1326   return (__m128i)__c;
1327 }
1328 
1329 static __inline__ int __DEFAULT_FN_ATTRS
_mm_movemask_epi8(__m128i __a)1330 _mm_movemask_epi8(__m128i __a)
1331 {
1332   return __builtin_ia32_pmovmskb128((__v16qi)__a);
1333 }
1334 
1335 #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
1336   (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
1337                                    (__v4si)_mm_setzero_si128(), \
1338                                    (imm) & 0x3, ((imm) & 0xc) >> 2, \
1339                                    ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
1340 
1341 #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
1342   (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
1343                                    (__v8hi)_mm_setzero_si128(), \
1344                                    (imm) & 0x3, ((imm) & 0xc) >> 2, \
1345                                    ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
1346                                    4, 5, 6, 7); })
1347 
1348 #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
1349   (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
1350                                    (__v8hi)_mm_setzero_si128(), \
1351                                    0, 1, 2, 3, \
1352                                    4 + (((imm) & 0x03) >> 0), \
1353                                    4 + (((imm) & 0x0c) >> 2), \
1354                                    4 + (((imm) & 0x30) >> 4), \
1355                                    4 + (((imm) & 0xc0) >> 6)); })
1356 
1357 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_unpackhi_epi8(__m128i __a,__m128i __b)1358 _mm_unpackhi_epi8(__m128i __a, __m128i __b)
1359 {
1360   return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
1361 }
1362 
1363 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_unpackhi_epi16(__m128i __a,__m128i __b)1364 _mm_unpackhi_epi16(__m128i __a, __m128i __b)
1365 {
1366   return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
1367 }
1368 
1369 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_unpackhi_epi32(__m128i __a,__m128i __b)1370 _mm_unpackhi_epi32(__m128i __a, __m128i __b)
1371 {
1372   return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
1373 }
1374 
1375 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_unpackhi_epi64(__m128i __a,__m128i __b)1376 _mm_unpackhi_epi64(__m128i __a, __m128i __b)
1377 {
1378   return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1);
1379 }
1380 
1381 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_unpacklo_epi8(__m128i __a,__m128i __b)1382 _mm_unpacklo_epi8(__m128i __a, __m128i __b)
1383 {
1384   return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
1385 }
1386 
1387 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_unpacklo_epi16(__m128i __a,__m128i __b)1388 _mm_unpacklo_epi16(__m128i __a, __m128i __b)
1389 {
1390   return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
1391 }
1392 
1393 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_unpacklo_epi32(__m128i __a,__m128i __b)1394 _mm_unpacklo_epi32(__m128i __a, __m128i __b)
1395 {
1396   return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
1397 }
1398 
1399 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_unpacklo_epi64(__m128i __a,__m128i __b)1400 _mm_unpacklo_epi64(__m128i __a, __m128i __b)
1401 {
1402   return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0);
1403 }
1404 
1405 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_movepi64_pi64(__m128i __a)1406 _mm_movepi64_pi64(__m128i __a)
1407 {
1408   return (__m64)__a[0];
1409 }
1410 
1411 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_movpi64_epi64(__m64 __a)1412 _mm_movpi64_epi64(__m64 __a)
1413 {
1414   return (__m128i){ (long long)__a, 0 };
1415 }
1416 
1417 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_move_epi64(__m128i __a)1418 _mm_move_epi64(__m128i __a)
1419 {
1420   return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2);
1421 }
1422 
1423 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_unpackhi_pd(__m128d __a,__m128d __b)1424 _mm_unpackhi_pd(__m128d __a, __m128d __b)
1425 {
1426   return __builtin_shufflevector(__a, __b, 1, 2+1);
1427 }
1428 
1429 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_unpacklo_pd(__m128d __a,__m128d __b)1430 _mm_unpacklo_pd(__m128d __a, __m128d __b)
1431 {
1432   return __builtin_shufflevector(__a, __b, 0, 2+0);
1433 }
1434 
1435 static __inline__ int __DEFAULT_FN_ATTRS
_mm_movemask_pd(__m128d __a)1436 _mm_movemask_pd(__m128d __a)
1437 {
1438   return __builtin_ia32_movmskpd(__a);
1439 }
1440 
1441 #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
1442   (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
1443                                    (i) & 1, (((i) & 2) >> 1) + 2); })
1444 
1445 static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_castpd_ps(__m128d __a)1446 _mm_castpd_ps(__m128d __a)
1447 {
1448   return (__m128)__a;
1449 }
1450 
1451 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_castpd_si128(__m128d __a)1452 _mm_castpd_si128(__m128d __a)
1453 {
1454   return (__m128i)__a;
1455 }
1456 
1457 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_castps_pd(__m128 __a)1458 _mm_castps_pd(__m128 __a)
1459 {
1460   return (__m128d)__a;
1461 }
1462 
1463 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_castps_si128(__m128 __a)1464 _mm_castps_si128(__m128 __a)
1465 {
1466   return (__m128i)__a;
1467 }
1468 
1469 static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_castsi128_ps(__m128i __a)1470 _mm_castsi128_ps(__m128i __a)
1471 {
1472   return (__m128)__a;
1473 }
1474 
1475 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_castsi128_pd(__m128i __a)1476 _mm_castsi128_pd(__m128i __a)
1477 {
1478   return (__m128d)__a;
1479 }
1480 
1481 static __inline__ void __DEFAULT_FN_ATTRS
_mm_pause(void)1482 _mm_pause(void)
1483 {
1484   __builtin_ia32_pause();
1485 }
1486 
1487 #undef __DEFAULT_FN_ATTRS
1488 
1489 #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
1490 
1491 #endif /* __EMMINTRIN_H */
1492