1 /*===---- xopintrin.h - XOP intrinsics -------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24 #ifndef __X86INTRIN_H
25 #error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
26 #endif
27
28 #ifndef __XOPINTRIN_H
29 #define __XOPINTRIN_H
30
31 #include <fma4intrin.h>
32
33 /* Define the default attributes for the functions in this file. */
34 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop")))
35
36 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccs_epi16(__m128i __A,__m128i __B,__m128i __C)37 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
38 {
39 return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
40 }
41
42 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macc_epi16(__m128i __A,__m128i __B,__m128i __C)43 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
44 {
45 return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
46 }
47
48 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccsd_epi16(__m128i __A,__m128i __B,__m128i __C)49 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
50 {
51 return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
52 }
53
54 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccd_epi16(__m128i __A,__m128i __B,__m128i __C)55 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
56 {
57 return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
58 }
59
60 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccs_epi32(__m128i __A,__m128i __B,__m128i __C)61 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
62 {
63 return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
64 }
65
66 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macc_epi32(__m128i __A,__m128i __B,__m128i __C)67 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
68 {
69 return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
70 }
71
72 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccslo_epi32(__m128i __A,__m128i __B,__m128i __C)73 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
74 {
75 return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
76 }
77
78 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macclo_epi32(__m128i __A,__m128i __B,__m128i __C)79 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
80 {
81 return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
82 }
83
84 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccshi_epi32(__m128i __A,__m128i __B,__m128i __C)85 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
86 {
87 return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
88 }
89
90 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macchi_epi32(__m128i __A,__m128i __B,__m128i __C)91 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
92 {
93 return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
94 }
95
96 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maddsd_epi16(__m128i __A,__m128i __B,__m128i __C)97 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
98 {
99 return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
100 }
101
102 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maddd_epi16(__m128i __A,__m128i __B,__m128i __C)103 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
104 {
105 return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
106 }
107
108 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddw_epi8(__m128i __A)109 _mm_haddw_epi8(__m128i __A)
110 {
111 return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
112 }
113
114 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epi8(__m128i __A)115 _mm_haddd_epi8(__m128i __A)
116 {
117 return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
118 }
119
120 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi8(__m128i __A)121 _mm_haddq_epi8(__m128i __A)
122 {
123 return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
124 }
125
126 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epi16(__m128i __A)127 _mm_haddd_epi16(__m128i __A)
128 {
129 return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
130 }
131
132 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi16(__m128i __A)133 _mm_haddq_epi16(__m128i __A)
134 {
135 return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
136 }
137
138 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi32(__m128i __A)139 _mm_haddq_epi32(__m128i __A)
140 {
141 return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
142 }
143
144 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddw_epu8(__m128i __A)145 _mm_haddw_epu8(__m128i __A)
146 {
147 return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
148 }
149
150 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epu8(__m128i __A)151 _mm_haddd_epu8(__m128i __A)
152 {
153 return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
154 }
155
156 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu8(__m128i __A)157 _mm_haddq_epu8(__m128i __A)
158 {
159 return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
160 }
161
162 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epu16(__m128i __A)163 _mm_haddd_epu16(__m128i __A)
164 {
165 return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
166 }
167
168 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu16(__m128i __A)169 _mm_haddq_epu16(__m128i __A)
170 {
171 return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
172 }
173
174 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu32(__m128i __A)175 _mm_haddq_epu32(__m128i __A)
176 {
177 return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
178 }
179
180 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubw_epi8(__m128i __A)181 _mm_hsubw_epi8(__m128i __A)
182 {
183 return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
184 }
185
186 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubd_epi16(__m128i __A)187 _mm_hsubd_epi16(__m128i __A)
188 {
189 return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
190 }
191
192 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubq_epi32(__m128i __A)193 _mm_hsubq_epi32(__m128i __A)
194 {
195 return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
196 }
197
198 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmov_si128(__m128i __A,__m128i __B,__m128i __C)199 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
200 {
201 return (__m128i)__builtin_ia32_vpcmov((__v2di)__A, (__v2di)__B, (__v2di)__C);
202 }
203
204 static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_cmov_si256(__m256i __A,__m256i __B,__m256i __C)205 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
206 {
207 return (__m256i)__builtin_ia32_vpcmov_256((__v4di)__A, (__v4di)__B, (__v4di)__C);
208 }
209
210 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_perm_epi8(__m128i __A,__m128i __B,__m128i __C)211 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
212 {
213 return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
214 }
215
216 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi8(__m128i __A,__m128i __B)217 _mm_rot_epi8(__m128i __A, __m128i __B)
218 {
219 return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
220 }
221
222 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi16(__m128i __A,__m128i __B)223 _mm_rot_epi16(__m128i __A, __m128i __B)
224 {
225 return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
226 }
227
228 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi32(__m128i __A,__m128i __B)229 _mm_rot_epi32(__m128i __A, __m128i __B)
230 {
231 return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
232 }
233
234 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi64(__m128i __A,__m128i __B)235 _mm_rot_epi64(__m128i __A, __m128i __B)
236 {
237 return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
238 }
239
240 #define _mm_roti_epi8(A, N) __extension__ ({ \
241 (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)); })
242
243 #define _mm_roti_epi16(A, N) __extension__ ({ \
244 (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)); })
245
246 #define _mm_roti_epi32(A, N) __extension__ ({ \
247 (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)); })
248
249 #define _mm_roti_epi64(A, N) __extension__ ({ \
250 (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)); })
251
252 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi8(__m128i __A,__m128i __B)253 _mm_shl_epi8(__m128i __A, __m128i __B)
254 {
255 return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
256 }
257
258 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi16(__m128i __A,__m128i __B)259 _mm_shl_epi16(__m128i __A, __m128i __B)
260 {
261 return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
262 }
263
264 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi32(__m128i __A,__m128i __B)265 _mm_shl_epi32(__m128i __A, __m128i __B)
266 {
267 return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
268 }
269
270 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi64(__m128i __A,__m128i __B)271 _mm_shl_epi64(__m128i __A, __m128i __B)
272 {
273 return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
274 }
275
276 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi8(__m128i __A,__m128i __B)277 _mm_sha_epi8(__m128i __A, __m128i __B)
278 {
279 return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
280 }
281
282 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi16(__m128i __A,__m128i __B)283 _mm_sha_epi16(__m128i __A, __m128i __B)
284 {
285 return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
286 }
287
288 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi32(__m128i __A,__m128i __B)289 _mm_sha_epi32(__m128i __A, __m128i __B)
290 {
291 return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
292 }
293
294 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi64(__m128i __A,__m128i __B)295 _mm_sha_epi64(__m128i __A, __m128i __B)
296 {
297 return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
298 }
299
300 #define _mm_com_epu8(A, B, N) __extension__ ({ \
301 (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
302 (__v16qi)(__m128i)(B), (N)); })
303
304 #define _mm_com_epu16(A, B, N) __extension__ ({ \
305 (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
306 (__v8hi)(__m128i)(B), (N)); })
307
308 #define _mm_com_epu32(A, B, N) __extension__ ({ \
309 (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
310 (__v4si)(__m128i)(B), (N)); })
311
312 #define _mm_com_epu64(A, B, N) __extension__ ({ \
313 (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
314 (__v2di)(__m128i)(B), (N)); })
315
316 #define _mm_com_epi8(A, B, N) __extension__ ({ \
317 (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
318 (__v16qi)(__m128i)(B), (N)); })
319
320 #define _mm_com_epi16(A, B, N) __extension__ ({ \
321 (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
322 (__v8hi)(__m128i)(B), (N)); })
323
324 #define _mm_com_epi32(A, B, N) __extension__ ({ \
325 (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
326 (__v4si)(__m128i)(B), (N)); })
327
328 #define _mm_com_epi64(A, B, N) __extension__ ({ \
329 (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
330 (__v2di)(__m128i)(B), (N)); })
331
332 #define _MM_PCOMCTRL_LT 0
333 #define _MM_PCOMCTRL_LE 1
334 #define _MM_PCOMCTRL_GT 2
335 #define _MM_PCOMCTRL_GE 3
336 #define _MM_PCOMCTRL_EQ 4
337 #define _MM_PCOMCTRL_NEQ 5
338 #define _MM_PCOMCTRL_FALSE 6
339 #define _MM_PCOMCTRL_TRUE 7
340
341 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu8(__m128i __A,__m128i __B)342 _mm_comlt_epu8(__m128i __A, __m128i __B)
343 {
344 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
345 }
346
347 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu8(__m128i __A,__m128i __B)348 _mm_comle_epu8(__m128i __A, __m128i __B)
349 {
350 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
351 }
352
353 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu8(__m128i __A,__m128i __B)354 _mm_comgt_epu8(__m128i __A, __m128i __B)
355 {
356 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
357 }
358
359 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu8(__m128i __A,__m128i __B)360 _mm_comge_epu8(__m128i __A, __m128i __B)
361 {
362 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
363 }
364
365 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu8(__m128i __A,__m128i __B)366 _mm_comeq_epu8(__m128i __A, __m128i __B)
367 {
368 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
369 }
370
371 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu8(__m128i __A,__m128i __B)372 _mm_comneq_epu8(__m128i __A, __m128i __B)
373 {
374 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
375 }
376
377 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu8(__m128i __A,__m128i __B)378 _mm_comfalse_epu8(__m128i __A, __m128i __B)
379 {
380 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
381 }
382
383 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu8(__m128i __A,__m128i __B)384 _mm_comtrue_epu8(__m128i __A, __m128i __B)
385 {
386 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
387 }
388
389 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu16(__m128i __A,__m128i __B)390 _mm_comlt_epu16(__m128i __A, __m128i __B)
391 {
392 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
393 }
394
395 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu16(__m128i __A,__m128i __B)396 _mm_comle_epu16(__m128i __A, __m128i __B)
397 {
398 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
399 }
400
401 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu16(__m128i __A,__m128i __B)402 _mm_comgt_epu16(__m128i __A, __m128i __B)
403 {
404 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
405 }
406
407 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu16(__m128i __A,__m128i __B)408 _mm_comge_epu16(__m128i __A, __m128i __B)
409 {
410 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
411 }
412
413 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu16(__m128i __A,__m128i __B)414 _mm_comeq_epu16(__m128i __A, __m128i __B)
415 {
416 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
417 }
418
419 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu16(__m128i __A,__m128i __B)420 _mm_comneq_epu16(__m128i __A, __m128i __B)
421 {
422 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
423 }
424
425 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu16(__m128i __A,__m128i __B)426 _mm_comfalse_epu16(__m128i __A, __m128i __B)
427 {
428 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
429 }
430
431 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu16(__m128i __A,__m128i __B)432 _mm_comtrue_epu16(__m128i __A, __m128i __B)
433 {
434 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
435 }
436
437 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu32(__m128i __A,__m128i __B)438 _mm_comlt_epu32(__m128i __A, __m128i __B)
439 {
440 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
441 }
442
443 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu32(__m128i __A,__m128i __B)444 _mm_comle_epu32(__m128i __A, __m128i __B)
445 {
446 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
447 }
448
449 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu32(__m128i __A,__m128i __B)450 _mm_comgt_epu32(__m128i __A, __m128i __B)
451 {
452 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
453 }
454
455 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu32(__m128i __A,__m128i __B)456 _mm_comge_epu32(__m128i __A, __m128i __B)
457 {
458 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
459 }
460
461 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu32(__m128i __A,__m128i __B)462 _mm_comeq_epu32(__m128i __A, __m128i __B)
463 {
464 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
465 }
466
467 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu32(__m128i __A,__m128i __B)468 _mm_comneq_epu32(__m128i __A, __m128i __B)
469 {
470 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
471 }
472
473 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu32(__m128i __A,__m128i __B)474 _mm_comfalse_epu32(__m128i __A, __m128i __B)
475 {
476 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
477 }
478
479 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu32(__m128i __A,__m128i __B)480 _mm_comtrue_epu32(__m128i __A, __m128i __B)
481 {
482 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
483 }
484
485 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu64(__m128i __A,__m128i __B)486 _mm_comlt_epu64(__m128i __A, __m128i __B)
487 {
488 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
489 }
490
491 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu64(__m128i __A,__m128i __B)492 _mm_comle_epu64(__m128i __A, __m128i __B)
493 {
494 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
495 }
496
497 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu64(__m128i __A,__m128i __B)498 _mm_comgt_epu64(__m128i __A, __m128i __B)
499 {
500 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
501 }
502
503 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu64(__m128i __A,__m128i __B)504 _mm_comge_epu64(__m128i __A, __m128i __B)
505 {
506 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
507 }
508
509 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu64(__m128i __A,__m128i __B)510 _mm_comeq_epu64(__m128i __A, __m128i __B)
511 {
512 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
513 }
514
515 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu64(__m128i __A,__m128i __B)516 _mm_comneq_epu64(__m128i __A, __m128i __B)
517 {
518 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
519 }
520
521 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu64(__m128i __A,__m128i __B)522 _mm_comfalse_epu64(__m128i __A, __m128i __B)
523 {
524 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
525 }
526
527 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu64(__m128i __A,__m128i __B)528 _mm_comtrue_epu64(__m128i __A, __m128i __B)
529 {
530 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
531 }
532
533 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi8(__m128i __A,__m128i __B)534 _mm_comlt_epi8(__m128i __A, __m128i __B)
535 {
536 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
537 }
538
539 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi8(__m128i __A,__m128i __B)540 _mm_comle_epi8(__m128i __A, __m128i __B)
541 {
542 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
543 }
544
545 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi8(__m128i __A,__m128i __B)546 _mm_comgt_epi8(__m128i __A, __m128i __B)
547 {
548 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
549 }
550
551 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi8(__m128i __A,__m128i __B)552 _mm_comge_epi8(__m128i __A, __m128i __B)
553 {
554 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
555 }
556
557 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi8(__m128i __A,__m128i __B)558 _mm_comeq_epi8(__m128i __A, __m128i __B)
559 {
560 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
561 }
562
563 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi8(__m128i __A,__m128i __B)564 _mm_comneq_epi8(__m128i __A, __m128i __B)
565 {
566 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
567 }
568
569 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi8(__m128i __A,__m128i __B)570 _mm_comfalse_epi8(__m128i __A, __m128i __B)
571 {
572 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
573 }
574
575 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi8(__m128i __A,__m128i __B)576 _mm_comtrue_epi8(__m128i __A, __m128i __B)
577 {
578 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
579 }
580
581 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi16(__m128i __A,__m128i __B)582 _mm_comlt_epi16(__m128i __A, __m128i __B)
583 {
584 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
585 }
586
587 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi16(__m128i __A,__m128i __B)588 _mm_comle_epi16(__m128i __A, __m128i __B)
589 {
590 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
591 }
592
593 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi16(__m128i __A,__m128i __B)594 _mm_comgt_epi16(__m128i __A, __m128i __B)
595 {
596 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
597 }
598
599 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi16(__m128i __A,__m128i __B)600 _mm_comge_epi16(__m128i __A, __m128i __B)
601 {
602 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
603 }
604
605 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi16(__m128i __A,__m128i __B)606 _mm_comeq_epi16(__m128i __A, __m128i __B)
607 {
608 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
609 }
610
611 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi16(__m128i __A,__m128i __B)612 _mm_comneq_epi16(__m128i __A, __m128i __B)
613 {
614 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
615 }
616
617 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi16(__m128i __A,__m128i __B)618 _mm_comfalse_epi16(__m128i __A, __m128i __B)
619 {
620 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
621 }
622
623 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi16(__m128i __A,__m128i __B)624 _mm_comtrue_epi16(__m128i __A, __m128i __B)
625 {
626 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
627 }
628
629 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi32(__m128i __A,__m128i __B)630 _mm_comlt_epi32(__m128i __A, __m128i __B)
631 {
632 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
633 }
634
635 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi32(__m128i __A,__m128i __B)636 _mm_comle_epi32(__m128i __A, __m128i __B)
637 {
638 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
639 }
640
641 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi32(__m128i __A,__m128i __B)642 _mm_comgt_epi32(__m128i __A, __m128i __B)
643 {
644 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
645 }
646
647 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi32(__m128i __A,__m128i __B)648 _mm_comge_epi32(__m128i __A, __m128i __B)
649 {
650 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
651 }
652
653 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi32(__m128i __A,__m128i __B)654 _mm_comeq_epi32(__m128i __A, __m128i __B)
655 {
656 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
657 }
658
659 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi32(__m128i __A,__m128i __B)660 _mm_comneq_epi32(__m128i __A, __m128i __B)
661 {
662 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
663 }
664
665 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi32(__m128i __A,__m128i __B)666 _mm_comfalse_epi32(__m128i __A, __m128i __B)
667 {
668 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
669 }
670
671 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi32(__m128i __A,__m128i __B)672 _mm_comtrue_epi32(__m128i __A, __m128i __B)
673 {
674 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
675 }
676
677 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi64(__m128i __A,__m128i __B)678 _mm_comlt_epi64(__m128i __A, __m128i __B)
679 {
680 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
681 }
682
683 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi64(__m128i __A,__m128i __B)684 _mm_comle_epi64(__m128i __A, __m128i __B)
685 {
686 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
687 }
688
689 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi64(__m128i __A,__m128i __B)690 _mm_comgt_epi64(__m128i __A, __m128i __B)
691 {
692 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
693 }
694
695 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi64(__m128i __A,__m128i __B)696 _mm_comge_epi64(__m128i __A, __m128i __B)
697 {
698 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
699 }
700
701 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi64(__m128i __A,__m128i __B)702 _mm_comeq_epi64(__m128i __A, __m128i __B)
703 {
704 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
705 }
706
707 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi64(__m128i __A,__m128i __B)708 _mm_comneq_epi64(__m128i __A, __m128i __B)
709 {
710 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
711 }
712
713 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi64(__m128i __A,__m128i __B)714 _mm_comfalse_epi64(__m128i __A, __m128i __B)
715 {
716 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
717 }
718
719 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi64(__m128i __A,__m128i __B)720 _mm_comtrue_epi64(__m128i __A, __m128i __B)
721 {
722 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
723 }
724
725 #define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
726 (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
727 (__v2df)(__m128d)(Y), \
728 (__v2di)(__m128i)(C), (I)); })
729
730 #define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \
731 (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
732 (__v4df)(__m256d)(Y), \
733 (__v4di)(__m256i)(C), (I)); })
734
735 #define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \
736 (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
737 (__v4si)(__m128i)(C), (I)); })
738
739 #define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \
740 (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
741 (__v8sf)(__m256)(Y), \
742 (__v8si)(__m256i)(C), (I)); })
743
744 static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ss(__m128 __A)745 _mm_frcz_ss(__m128 __A)
746 {
747 return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
748 }
749
750 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_frcz_sd(__m128d __A)751 _mm_frcz_sd(__m128d __A)
752 {
753 return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
754 }
755
756 static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ps(__m128 __A)757 _mm_frcz_ps(__m128 __A)
758 {
759 return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
760 }
761
762 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_frcz_pd(__m128d __A)763 _mm_frcz_pd(__m128d __A)
764 {
765 return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
766 }
767
768 static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_frcz_ps(__m256 __A)769 _mm256_frcz_ps(__m256 __A)
770 {
771 return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
772 }
773
774 static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_frcz_pd(__m256d __A)775 _mm256_frcz_pd(__m256d __A)
776 {
777 return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
778 }
779
780 #undef __DEFAULT_FN_ATTRS
781
782 #endif /* __XOPINTRIN_H */
783