1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24 #ifndef __TMMINTRIN_H
25 #define __TMMINTRIN_H
26
27 #include <pmmintrin.h>
28
29 /* Define the default attributes for the functions in this file. */
30 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
31
32 /// \brief Computes the absolute value of each of the packed 8-bit signed
33 /// integers in the source operand and stores the 8-bit unsigned integer
34 /// results in the destination.
35 ///
36 /// \headerfile <x86intrin.h>
37 ///
38 /// This intrinsic corresponds to the \c PABSB instruction.
39 ///
40 /// \param __a
41 /// A 64-bit vector of [8 x i8].
42 /// \returns A 64-bit integer vector containing the absolute values of the
43 /// elements in the operand.
44 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_abs_pi8(__m64 __a)45 _mm_abs_pi8(__m64 __a)
46 {
47 return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
48 }
49
50 /// \brief Computes the absolute value of each of the packed 8-bit signed
51 /// integers in the source operand and stores the 8-bit unsigned integer
52 /// results in the destination.
53 ///
54 /// \headerfile <x86intrin.h>
55 ///
56 /// This intrinsic corresponds to the \c VPABSB instruction.
57 ///
58 /// \param __a
59 /// A 128-bit vector of [16 x i8].
60 /// \returns A 128-bit integer vector containing the absolute values of the
61 /// elements in the operand.
62 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_abs_epi8(__m128i __a)63 _mm_abs_epi8(__m128i __a)
64 {
65 return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
66 }
67
68 /// \brief Computes the absolute value of each of the packed 16-bit signed
69 /// integers in the source operand and stores the 16-bit unsigned integer
70 /// results in the destination.
71 ///
72 /// \headerfile <x86intrin.h>
73 ///
74 /// This intrinsic corresponds to the \c PABSW instruction.
75 ///
76 /// \param __a
77 /// A 64-bit vector of [4 x i16].
78 /// \returns A 64-bit integer vector containing the absolute values of the
79 /// elements in the operand.
80 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_abs_pi16(__m64 __a)81 _mm_abs_pi16(__m64 __a)
82 {
83 return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
84 }
85
86 /// \brief Computes the absolute value of each of the packed 16-bit signed
87 /// integers in the source operand and stores the 16-bit unsigned integer
88 /// results in the destination.
89 ///
90 /// \headerfile <x86intrin.h>
91 ///
92 /// This intrinsic corresponds to the \c VPABSW instruction.
93 ///
94 /// \param __a
95 /// A 128-bit vector of [8 x i16].
96 /// \returns A 128-bit integer vector containing the absolute values of the
97 /// elements in the operand.
98 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_abs_epi16(__m128i __a)99 _mm_abs_epi16(__m128i __a)
100 {
101 return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
102 }
103
104 /// \brief Computes the absolute value of each of the packed 32-bit signed
105 /// integers in the source operand and stores the 32-bit unsigned integer
106 /// results in the destination.
107 ///
108 /// \headerfile <x86intrin.h>
109 ///
110 /// This intrinsic corresponds to the \c PABSD instruction.
111 ///
112 /// \param __a
113 /// A 64-bit vector of [2 x i32].
114 /// \returns A 64-bit integer vector containing the absolute values of the
115 /// elements in the operand.
116 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_abs_pi32(__m64 __a)117 _mm_abs_pi32(__m64 __a)
118 {
119 return (__m64)__builtin_ia32_pabsd((__v2si)__a);
120 }
121
122 /// \brief Computes the absolute value of each of the packed 32-bit signed
123 /// integers in the source operand and stores the 32-bit unsigned integer
124 /// results in the destination.
125 ///
126 /// \headerfile <x86intrin.h>
127 ///
128 /// This intrinsic corresponds to the \c VPABSD instruction.
129 ///
130 /// \param __a
131 /// A 128-bit vector of [4 x i32].
132 /// \returns A 128-bit integer vector containing the absolute values of the
133 /// elements in the operand.
134 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_abs_epi32(__m128i __a)135 _mm_abs_epi32(__m128i __a)
136 {
137 return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
138 }
139
140 /// \brief Concatenates the two 128-bit integer vector operands, and
141 /// right-shifts the result by the number of bytes specified in the immediate
142 /// operand.
143 ///
144 /// \headerfile <x86intrin.h>
145 ///
146 /// \code
147 /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
148 /// \endcode
149 ///
150 /// This intrinsic corresponds to the \c PALIGNR instruction.
151 ///
152 /// \param a
153 /// A 128-bit vector of [16 x i8] containing one of the source operands.
154 /// \param b
155 /// A 128-bit vector of [16 x i8] containing one of the source operands.
156 /// \param n
157 /// An immediate operand specifying how many bytes to right-shift the result.
158 /// \returns A 128-bit integer vector containing the concatenated right-shifted
159 /// value.
160 #define _mm_alignr_epi8(a, b, n) __extension__ ({ \
161 (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
162 (__v16qi)(__m128i)(b), (n)); })
163
164 /// \brief Concatenates the two 64-bit integer vector operands, and right-shifts
165 /// the result by the number of bytes specified in the immediate operand.
166 ///
167 /// \headerfile <x86intrin.h>
168 ///
169 /// \code
170 /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
171 /// \endcode
172 ///
173 /// This intrinsic corresponds to the \c PALIGNR instruction.
174 ///
175 /// \param a
176 /// A 64-bit vector of [8 x i8] containing one of the source operands.
177 /// \param b
178 /// A 64-bit vector of [8 x i8] containing one of the source operands.
179 /// \param n
180 /// An immediate operand specifying how many bytes to right-shift the result.
181 /// \returns A 64-bit integer vector containing the concatenated right-shifted
182 /// value.
183 #define _mm_alignr_pi8(a, b, n) __extension__ ({ \
184 (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); })
185
186 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
187 /// 128-bit vectors of [8 x i16].
188 ///
189 /// \headerfile <x86intrin.h>
190 ///
191 /// This intrinsic corresponds to the \c VPHADDW instruction.
192 ///
193 /// \param __a
194 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
195 /// horizontal sums of the values are stored in the lower bits of the
196 /// destination.
197 /// \param __b
198 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
199 /// horizontal sums of the values are stored in the upper bits of the
200 /// destination.
201 /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
202 /// both operands.
203 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hadd_epi16(__m128i __a,__m128i __b)204 _mm_hadd_epi16(__m128i __a, __m128i __b)
205 {
206 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
207 }
208
209 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
210 /// 128-bit vectors of [4 x i32].
211 ///
212 /// \headerfile <x86intrin.h>
213 ///
214 /// This intrinsic corresponds to the \c VPHADDD instruction.
215 ///
216 /// \param __a
217 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
218 /// horizontal sums of the values are stored in the lower bits of the
219 /// destination.
220 /// \param __b
221 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
222 /// horizontal sums of the values are stored in the upper bits of the
223 /// destination.
224 /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
225 /// both operands.
226 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hadd_epi32(__m128i __a,__m128i __b)227 _mm_hadd_epi32(__m128i __a, __m128i __b)
228 {
229 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
230 }
231
232 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
233 /// 64-bit vectors of [4 x i16].
234 ///
235 /// \headerfile <x86intrin.h>
236 ///
237 /// This intrinsic corresponds to the \c PHADDW instruction.
238 ///
239 /// \param __a
240 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
241 /// horizontal sums of the values are stored in the lower bits of the
242 /// destination.
243 /// \param __b
244 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
245 /// horizontal sums of the values are stored in the upper bits of the
246 /// destination.
247 /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
248 /// operands.
249 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hadd_pi16(__m64 __a,__m64 __b)250 _mm_hadd_pi16(__m64 __a, __m64 __b)
251 {
252 return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
253 }
254
255 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
256 /// 64-bit vectors of [2 x i32].
257 ///
258 /// \headerfile <x86intrin.h>
259 ///
260 /// This intrinsic corresponds to the \c PHADDD instruction.
261 ///
262 /// \param __a
263 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
264 /// horizontal sums of the values are stored in the lower bits of the
265 /// destination.
266 /// \param __b
267 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
268 /// horizontal sums of the values are stored in the upper bits of the
269 /// destination.
270 /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
271 /// operands.
272 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hadd_pi32(__m64 __a,__m64 __b)273 _mm_hadd_pi32(__m64 __a, __m64 __b)
274 {
275 return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
276 }
277
278 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
279 /// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
280 /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
281 ///
282 /// \headerfile <x86intrin.h>
283 ///
284 /// This intrinsic corresponds to the \c VPHADDSW instruction.
285 ///
286 /// \param __a
287 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
288 /// horizontal sums of the values are stored in the lower bits of the
289 /// destination.
290 /// \param __b
291 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
292 /// horizontal sums of the values are stored in the upper bits of the
293 /// destination.
294 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
295 /// sums of both operands.
296 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hadds_epi16(__m128i __a,__m128i __b)297 _mm_hadds_epi16(__m128i __a, __m128i __b)
298 {
299 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
300 }
301
302 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
303 /// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
304 /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
305 ///
306 /// \headerfile <x86intrin.h>
307 ///
308 /// This intrinsic corresponds to the \c PHADDSW instruction.
309 ///
310 /// \param __a
311 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
312 /// horizontal sums of the values are stored in the lower bits of the
313 /// destination.
314 /// \param __b
315 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
316 /// horizontal sums of the values are stored in the upper bits of the
317 /// destination.
318 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
319 /// sums of both operands.
320 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hadds_pi16(__m64 __a,__m64 __b)321 _mm_hadds_pi16(__m64 __a, __m64 __b)
322 {
323 return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
324 }
325
326 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
327 /// packed 128-bit vectors of [8 x i16].
328 ///
329 /// \headerfile <x86intrin.h>
330 ///
331 /// This intrinsic corresponds to the \c VPHSUBW instruction.
332 ///
333 /// \param __a
334 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
335 /// horizontal differences between the values are stored in the lower bits of
336 /// the destination.
337 /// \param __b
338 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
339 /// horizontal differences between the values are stored in the upper bits of
340 /// the destination.
341 /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
342 /// of both operands.
343 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsub_epi16(__m128i __a,__m128i __b)344 _mm_hsub_epi16(__m128i __a, __m128i __b)
345 {
346 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
347 }
348
349 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
350 /// packed 128-bit vectors of [4 x i32].
351 ///
352 /// \headerfile <x86intrin.h>
353 ///
354 /// This intrinsic corresponds to the \c VPHSUBD instruction.
355 ///
356 /// \param __a
357 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
358 /// horizontal differences between the values are stored in the lower bits of
359 /// the destination.
360 /// \param __b
361 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
362 /// horizontal differences between the values are stored in the upper bits of
363 /// the destination.
364 /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
365 /// of both operands.
366 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsub_epi32(__m128i __a,__m128i __b)367 _mm_hsub_epi32(__m128i __a, __m128i __b)
368 {
369 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
370 }
371
372 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
373 /// packed 64-bit vectors of [4 x i16].
374 ///
375 /// \headerfile <x86intrin.h>
376 ///
377 /// This intrinsic corresponds to the \c PHSUBW instruction.
378 ///
379 /// \param __a
380 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
381 /// horizontal differences between the values are stored in the lower bits of
382 /// the destination.
383 /// \param __b
384 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
385 /// horizontal differences between the values are stored in the upper bits of
386 /// the destination.
387 /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
388 /// of both operands.
389 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hsub_pi16(__m64 __a,__m64 __b)390 _mm_hsub_pi16(__m64 __a, __m64 __b)
391 {
392 return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
393 }
394
395 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
396 /// packed 64-bit vectors of [2 x i32].
397 ///
398 /// \headerfile <x86intrin.h>
399 ///
400 /// This intrinsic corresponds to the \c PHSUBD instruction.
401 ///
402 /// \param __a
403 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
404 /// horizontal differences between the values are stored in the lower bits of
405 /// the destination.
406 /// \param __b
407 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
408 /// horizontal differences between the values are stored in the upper bits of
409 /// the destination.
410 /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
411 /// of both operands.
412 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hsub_pi32(__m64 __a,__m64 __b)413 _mm_hsub_pi32(__m64 __a, __m64 __b)
414 {
415 return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
416 }
417
418 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
419 /// packed 128-bit vectors of [8 x i16]. Positive differences greater than
420 /// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
421 /// saturated to 8000h.
422 ///
423 /// \headerfile <x86intrin.h>
424 ///
425 /// This intrinsic corresponds to the \c VPHSUBSW instruction.
426 ///
427 /// \param __a
428 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
429 /// horizontal differences between the values are stored in the lower bits of
430 /// the destination.
431 /// \param __b
432 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
433 /// horizontal differences between the values are stored in the upper bits of
434 /// the destination.
435 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
436 /// differences of both operands.
437 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubs_epi16(__m128i __a,__m128i __b)438 _mm_hsubs_epi16(__m128i __a, __m128i __b)
439 {
440 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
441 }
442
443 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
444 /// packed 64-bit vectors of [4 x i16]. Positive differences greater than
445 /// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
446 /// saturated to 8000h.
447 ///
448 /// \headerfile <x86intrin.h>
449 ///
450 /// This intrinsic corresponds to the \c PHSUBSW instruction.
451 ///
452 /// \param __a
453 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
454 /// horizontal differences between the values are stored in the lower bits of
455 /// the destination.
456 /// \param __b
457 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
458 /// horizontal differences between the values are stored in the upper bits of
459 /// the destination.
460 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
461 /// differences of both operands.
462 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hsubs_pi16(__m64 __a,__m64 __b)463 _mm_hsubs_pi16(__m64 __a, __m64 __b)
464 {
465 return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
466 }
467
468 /// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
469 /// values contained in the first source operand and packed 8-bit signed
470 /// integer values contained in the second source operand, adds pairs of
471 /// contiguous products with signed saturation, and writes the 16-bit sums to
472 /// the corresponding bits in the destination. For example, bits [7:0] of
473 /// both operands are multiplied, bits [15:8] of both operands are
474 /// multiplied, and the sum of both results is written to bits [15:0] of the
475 /// destination.
476 ///
477 /// \headerfile <x86intrin.h>
478 ///
479 /// This intrinsic corresponds to the \c VPMADDUBSW instruction.
480 ///
481 /// \param __a
482 /// A 128-bit integer vector containing the first source operand.
483 /// \param __b
484 /// A 128-bit integer vector containing the second source operand.
485 /// \returns A 128-bit integer vector containing the sums of products of both
486 /// operands:
487 /// R0 := (__a0 * __b0) + (__a1 * __b1)
488 /// R1 := (__a2 * __b2) + (__a3 * __b3)
489 /// R2 := (__a4 * __b4) + (__a5 * __b5)
490 /// R3 := (__a6 * __b6) + (__a7 * __b7)
491 /// R4 := (__a8 * __b8) + (__a9 * __b9)
492 /// R5 := (__a10 * __b10) + (__a11 * __b11)
493 /// R6 := (__a12 * __b12) + (__a13 * __b13)
494 /// R7 := (__a14 * __b14) + (__a15 * __b15)
495 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maddubs_epi16(__m128i __a,__m128i __b)496 _mm_maddubs_epi16(__m128i __a, __m128i __b)
497 {
498 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
499 }
500
501 /// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
502 /// values contained in the first source operand and packed 8-bit signed
503 /// integer values contained in the second source operand, adds pairs of
504 /// contiguous products with signed saturation, and writes the 16-bit sums to
505 /// the corresponding bits in the destination. For example, bits [7:0] of
506 /// both operands are multiplied, bits [15:8] of both operands are
507 /// multiplied, and the sum of both results is written to bits [15:0] of the
508 /// destination.
509 ///
510 /// \headerfile <x86intrin.h>
511 ///
512 /// This intrinsic corresponds to the \c PMADDUBSW instruction.
513 ///
514 /// \param __a
515 /// A 64-bit integer vector containing the first source operand.
516 /// \param __b
517 /// A 64-bit integer vector containing the second source operand.
518 /// \returns A 64-bit integer vector containing the sums of products of both
519 /// operands:
520 /// R0 := (__a0 * __b0) + (__a1 * __b1)
521 /// R1 := (__a2 * __b2) + (__a3 * __b3)
522 /// R2 := (__a4 * __b4) + (__a5 * __b5)
523 /// R3 := (__a6 * __b6) + (__a7 * __b7)
524 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_maddubs_pi16(__m64 __a,__m64 __b)525 _mm_maddubs_pi16(__m64 __a, __m64 __b)
526 {
527 return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
528 }
529
530 /// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
531 /// products to the 18 most significant bits by right-shifting, rounds the
532 /// truncated value by adding 1, and writes bits [16:1] to the destination.
533 ///
534 /// \headerfile <x86intrin.h>
535 ///
536 /// This intrinsic corresponds to the \c VPMULHRSW instruction.
537 ///
538 /// \param __a
539 /// A 128-bit vector of [8 x i16] containing one of the source operands.
540 /// \param __b
541 /// A 128-bit vector of [8 x i16] containing one of the source operands.
542 /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
543 /// products of both operands.
544 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mulhrs_epi16(__m128i __a,__m128i __b)545 _mm_mulhrs_epi16(__m128i __a, __m128i __b)
546 {
547 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
548 }
549
550 /// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
551 /// products to the 18 most significant bits by right-shifting, rounds the
552 /// truncated value by adding 1, and writes bits [16:1] to the destination.
553 ///
554 /// \headerfile <x86intrin.h>
555 ///
556 /// This intrinsic corresponds to the \c PMULHRSW instruction.
557 ///
558 /// \param __a
559 /// A 64-bit vector of [4 x i16] containing one of the source operands.
560 /// \param __b
561 /// A 64-bit vector of [4 x i16] containing one of the source operands.
562 /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
563 /// products of both operands.
564 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_mulhrs_pi16(__m64 __a,__m64 __b)565 _mm_mulhrs_pi16(__m64 __a, __m64 __b)
566 {
567 return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
568 }
569
570 /// \brief Copies the 8-bit integers from a 128-bit integer vector to the
571 /// destination or clears 8-bit values in the destination, as specified by
572 /// the second source operand.
573 ///
574 /// \headerfile <x86intrin.h>
575 ///
576 /// This intrinsic corresponds to the \c VPSHUFB instruction.
577 ///
578 /// \param __a
579 /// A 128-bit integer vector containing the values to be copied.
580 /// \param __b
581 /// A 128-bit integer vector containing control bytes corresponding to
582 /// positions in the destination:
583 /// Bit 7:
584 /// 1: Clear the corresponding byte in the destination.
585 /// 0: Copy the selected source byte to the corresponding byte in the
586 /// destination.
587 /// Bits [6:4] Reserved.
588 /// Bits [3:0] select the source byte to be copied.
589 /// \returns A 128-bit integer vector containing the copied or cleared values.
590 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shuffle_epi8(__m128i __a,__m128i __b)591 _mm_shuffle_epi8(__m128i __a, __m128i __b)
592 {
593 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
594 }
595
596 /// \brief Copies the 8-bit integers from a 64-bit integer vector to the
597 /// destination or clears 8-bit values in the destination, as specified by
598 /// the second source operand.
599 ///
600 /// \headerfile <x86intrin.h>
601 ///
602 /// This intrinsic corresponds to the \c PSHUFB instruction.
603 ///
604 /// \param __a
605 /// A 64-bit integer vector containing the values to be copied.
606 /// \param __b
607 /// A 64-bit integer vector containing control bytes corresponding to
608 /// positions in the destination:
609 /// Bit 7:
610 /// 1: Clear the corresponding byte in the destination.
611 /// 0: Copy the selected source byte to the corresponding byte in the
612 /// destination.
613 /// Bits [3:0] select the source byte to be copied.
614 /// \returns A 64-bit integer vector containing the copied or cleared values.
615 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_shuffle_pi8(__m64 __a,__m64 __b)616 _mm_shuffle_pi8(__m64 __a, __m64 __b)
617 {
618 return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
619 }
620
621 /// \brief For each 8-bit integer in the first source operand, perform one of
622 /// the following actions as specified by the second source operand: If the
623 /// byte in the second source is negative, calculate the two's complement of
624 /// the corresponding byte in the first source, and write that value to the
625 /// destination. If the byte in the second source is positive, copy the
626 /// corresponding byte from the first source to the destination. If the byte
627 /// in the second source is zero, clear the corresponding byte in the
628 /// destination.
629 ///
630 /// \headerfile <x86intrin.h>
631 ///
632 /// This intrinsic corresponds to the \c VPSIGNB instruction.
633 ///
634 /// \param __a
635 /// A 128-bit integer vector containing the values to be copied.
636 /// \param __b
637 /// A 128-bit integer vector containing control bytes corresponding to
638 /// positions in the destination.
639 /// \returns A 128-bit integer vector containing the resultant values.
640 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sign_epi8(__m128i __a,__m128i __b)641 _mm_sign_epi8(__m128i __a, __m128i __b)
642 {
643 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
644 }
645
646 /// \brief For each 16-bit integer in the first source operand, perform one of
647 /// the following actions as specified by the second source operand: If the
648 /// word in the second source is negative, calculate the two's complement of
649 /// the corresponding word in the first source, and write that value to the
650 /// destination. If the word in the second source is positive, copy the
651 /// corresponding word from the first source to the destination. If the word
652 /// in the second source is zero, clear the corresponding word in the
653 /// destination.
654 ///
655 /// \headerfile <x86intrin.h>
656 ///
657 /// This intrinsic corresponds to the \c VPSIGNW instruction.
658 ///
659 /// \param __a
660 /// A 128-bit integer vector containing the values to be copied.
661 /// \param __b
662 /// A 128-bit integer vector containing control words corresponding to
663 /// positions in the destination.
664 /// \returns A 128-bit integer vector containing the resultant values.
665 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sign_epi16(__m128i __a,__m128i __b)666 _mm_sign_epi16(__m128i __a, __m128i __b)
667 {
668 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
669 }
670
671 /// \brief For each 32-bit integer in the first source operand, perform one of
672 /// the following actions as specified by the second source operand: If the
673 /// doubleword in the second source is negative, calculate the two's
674 /// complement of the corresponding word in the first source, and write that
675 /// value to the destination. If the doubleword in the second source is
676 /// positive, copy the corresponding word from the first source to the
677 /// destination. If the doubleword in the second source is zero, clear the
678 /// corresponding word in the destination.
679 ///
680 /// \headerfile <x86intrin.h>
681 ///
682 /// This intrinsic corresponds to the \c VPSIGND instruction.
683 ///
684 /// \param __a
685 /// A 128-bit integer vector containing the values to be copied.
686 /// \param __b
687 /// A 128-bit integer vector containing control doublewords corresponding to
688 /// positions in the destination.
689 /// \returns A 128-bit integer vector containing the resultant values.
690 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sign_epi32(__m128i __a,__m128i __b)691 _mm_sign_epi32(__m128i __a, __m128i __b)
692 {
693 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
694 }
695
696 /// \brief For each 8-bit integer in the first source operand, perform one of
697 /// the following actions as specified by the second source operand: If the
698 /// byte in the second source is negative, calculate the two's complement of
699 /// the corresponding byte in the first source, and write that value to the
700 /// destination. If the byte in the second source is positive, copy the
701 /// corresponding byte from the first source to the destination. If the byte
702 /// in the second source is zero, clear the corresponding byte in the
703 /// destination.
704 ///
705 /// \headerfile <x86intrin.h>
706 ///
707 /// This intrinsic corresponds to the \c PSIGNB instruction.
708 ///
709 /// \param __a
710 /// A 64-bit integer vector containing the values to be copied.
711 /// \param __b
712 /// A 64-bit integer vector containing control bytes corresponding to
713 /// positions in the destination.
714 /// \returns A 64-bit integer vector containing the resultant values.
715 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sign_pi8(__m64 __a,__m64 __b)716 _mm_sign_pi8(__m64 __a, __m64 __b)
717 {
718 return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
719 }
720
721 /// \brief For each 16-bit integer in the first source operand, perform one of
722 /// the following actions as specified by the second source operand: If the
723 /// word in the second source is negative, calculate the two's complement of
724 /// the corresponding word in the first source, and write that value to the
725 /// destination. If the word in the second source is positive, copy the
726 /// corresponding word from the first source to the destination. If the word
727 /// in the second source is zero, clear the corresponding word in the
728 /// destination.
729 ///
730 /// \headerfile <x86intrin.h>
731 ///
732 /// This intrinsic corresponds to the \c PSIGNW instruction.
733 ///
734 /// \param __a
735 /// A 64-bit integer vector containing the values to be copied.
736 /// \param __b
737 /// A 64-bit integer vector containing control words corresponding to
738 /// positions in the destination.
739 /// \returns A 64-bit integer vector containing the resultant values.
740 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sign_pi16(__m64 __a,__m64 __b)741 _mm_sign_pi16(__m64 __a, __m64 __b)
742 {
743 return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
744 }
745
746 /// \brief For each 32-bit integer in the first source operand, perform one of
747 /// the following actions as specified by the second source operand: If the
748 /// doubleword in the second source is negative, calculate the two's
749 /// complement of the corresponding doubleword in the first source, and
750 /// write that value to the destination. If the doubleword in the second
751 /// source is positive, copy the corresponding doubleword from the first
752 /// source to the destination. If the doubleword in the second source is
753 /// zero, clear the corresponding doubleword in the destination.
754 ///
755 /// \headerfile <x86intrin.h>
756 ///
757 /// This intrinsic corresponds to the \c PSIGND instruction.
758 ///
759 /// \param __a
760 /// A 64-bit integer vector containing the values to be copied.
761 /// \param __b
762 /// A 64-bit integer vector containing two control doublewords corresponding
763 /// to positions in the destination.
764 /// \returns A 64-bit integer vector containing the resultant values.
765 static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sign_pi32(__m64 __a,__m64 __b)766 _mm_sign_pi32(__m64 __a, __m64 __b)
767 {
768 return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
769 }
770
771 #undef __DEFAULT_FN_ATTRS
772
773 #endif /* __TMMINTRIN_H */
774