• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*===----------- avx10_2satcvtintrin.h - AVX10_2SATCVT intrinsics ----------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 #ifndef __IMMINTRIN_H
10 #error                                                                         \
11     "Never use <avx10_2satcvtintrin.h> directly; include <immintrin.h> instead."
12 #endif // __IMMINTRIN_H
13 
14 #ifndef __AVX10_2SATCVTINTRIN_H
15 #define __AVX10_2SATCVTINTRIN_H
16 
17 #define _mm_ipcvtnebf16_epi8(A)                                                \
18   ((__m128i)__builtin_ia32_vcvtnebf162ibs128((__v8bf)(__m128bh)(A)))
19 
20 #define _mm_mask_ipcvtnebf16_epi8(W, U, A)                                     \
21   ((__m128i)__builtin_ia32_selectw_128(                                        \
22       (__mmask8)(U), (__v8hi)_mm_ipcvtnebf16_epi8(A), (__v8hi)(__m128i)(W)))
23 
24 #define _mm_maskz_ipcvtnebf16_epi8(U, A)                                       \
25   ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U),                          \
26                                        (__v8hi)_mm_ipcvtnebf16_epi8(A),        \
27                                        (__v8hi)_mm_setzero_si128()))
28 
29 #define _mm256_ipcvtnebf16_epi8(A)                                             \
30   ((__m256i)__builtin_ia32_vcvtnebf162ibs256((__v16bf)(__m256bh)(A)))
31 
32 #define _mm256_mask_ipcvtnebf16_epi8(W, U, A)                                  \
33   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
34                                        (__v16hi)_mm256_ipcvtnebf16_epi8(A),    \
35                                        (__v16hi)(__m256i)(W)))
36 
37 #define _mm256_maskz_ipcvtnebf16_epi8(U, A)                                    \
38   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
39                                        (__v16hi)_mm256_ipcvtnebf16_epi8(A),    \
40                                        (__v16hi)_mm256_setzero_si256()))
41 
42 #define _mm_ipcvtnebf16_epu8(A)                                                \
43   ((__m128i)__builtin_ia32_vcvtnebf162iubs128((__v8bf)(__m128bh)(A)))
44 
45 #define _mm_mask_ipcvtnebf16_epu8(W, U, A)                                     \
46   ((__m128i)__builtin_ia32_selectw_128(                                        \
47       (__mmask8)(U), (__v8hi)_mm_ipcvtnebf16_epu8(A), (__v8hi)(__m128i)(W)))
48 
49 #define _mm_maskz_ipcvtnebf16_epu8(U, A)                                       \
50   ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U),                          \
51                                        (__v8hi)_mm_ipcvtnebf16_epu8(A),        \
52                                        (__v8hi)_mm_setzero_si128()))
53 
54 #define _mm256_ipcvtnebf16_epu8(A)                                             \
55   ((__m256i)__builtin_ia32_vcvtnebf162iubs256((__v16bf)(__m256bh)(A)))
56 
57 #define _mm256_mask_ipcvtnebf16_epu8(W, U, A)                                  \
58   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
59                                        (__v16hi)_mm256_ipcvtnebf16_epu8(A),    \
60                                        (__v16hi)(__m256i)(W)))
61 
62 #define _mm256_maskz_ipcvtnebf16_epu8(U, A)                                    \
63   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
64                                        (__v16hi)_mm256_ipcvtnebf16_epu8(A),    \
65                                        (__v16hi)_mm256_setzero_si256()))
66 
67 #define _mm_ipcvtph_epi8(A)                                                    \
68   ((__m128i)__builtin_ia32_vcvtph2ibs128_mask(                                 \
69       (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
70 
71 #define _mm_mask_ipcvtph_epi8(W, U, A)                                         \
72   ((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A),            \
73                                               (__v8hu)(W), (__mmask8)(U)))
74 
75 #define _mm_maskz_ipcvtph_epi8(U, A)                                           \
76   ((__m128i)__builtin_ia32_vcvtph2ibs128_mask(                                 \
77       (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
78 
79 #define _mm256_ipcvtph_epi8(A)                                                 \
80   ((__m256i)__builtin_ia32_vcvtph2ibs256_mask(                                 \
81       (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
82       _MM_FROUND_CUR_DIRECTION))
83 
84 #define _mm256_mask_ipcvtph_epi8(W, U, A)                                      \
85   ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A),           \
86                                               (__v16hu)(W), (__mmask16)(U),    \
87                                               _MM_FROUND_CUR_DIRECTION))
88 
89 #define _mm256_maskz_ipcvtph_epi8(U, A)                                        \
90   ((__m256i)__builtin_ia32_vcvtph2ibs256_mask(                                 \
91       (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()),                \
92       (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
93 
94 #define _mm256_ipcvt_roundph_epi8(A, R)                                        \
95   ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A),           \
96                                               (__v16hu)_mm256_setzero_si256(), \
97                                               (__mmask16)-1, (const int)R))
98 
99 #define _mm256_mask_ipcvt_roundph_epi8(W, U, A, R)                             \
100   ((__m256i)__builtin_ia32_vcvtph2ibs256_mask(                                 \
101       (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
102 
103 #define _mm256_maskz_ipcvt_roundph_epi8(U, A, R)                               \
104   ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A),           \
105                                               (__v16hu)_mm256_setzero_si256(), \
106                                               (__mmask16)(U), (const int)R))
107 
108 #define _mm_ipcvtph_epu8(A)                                                    \
109   ((__m128i)__builtin_ia32_vcvtph2iubs128_mask(                                \
110       (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
111 
112 #define _mm_mask_ipcvtph_epu8(W, U, A)                                         \
113   ((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A),           \
114                                                (__v8hu)(W), (__mmask8)(U)))
115 
116 #define _mm_maskz_ipcvtph_epu8(U, A)                                           \
117   ((__m128i)__builtin_ia32_vcvtph2iubs128_mask(                                \
118       (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
119 
120 #define _mm256_ipcvtph_epu8(A)                                                 \
121   ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
122       (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
123       _MM_FROUND_CUR_DIRECTION))
124 
125 #define _mm256_mask_ipcvtph_epu8(W, U, A)                                      \
126   ((__m256i)__builtin_ia32_vcvtph2iubs256_mask((__v16hf)(__m256h)(A),          \
127                                                (__v16hu)(W), (__mmask16)(U),   \
128                                                _MM_FROUND_CUR_DIRECTION))
129 
130 #define _mm256_maskz_ipcvtph_epu8(U, A)                                        \
131   ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
132       (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()),                \
133       (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
134 
135 #define _mm256_ipcvt_roundph_epu8(A, R)                                        \
136   ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
137       (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
138       (const int)R))
139 
140 #define _mm256_mask_ipcvt_roundph_epu8(W, U, A, R)                             \
141   ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
142       (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
143 
144 #define _mm256_maskz_ipcvt_roundph_epu8(U, A, R)                               \
145   ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
146       (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U),  \
147       (const int)R))
148 
149 #define _mm_ipcvtps_epi8(A)                                                    \
150   ((__m128i)__builtin_ia32_vcvtps2ibs128_mask(                                 \
151       (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
152 
153 #define _mm_mask_ipcvtps_epi8(W, U, A)                                         \
154   ((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A),             \
155                                               (__v4su)(W), (__mmask8)(U)))
156 
157 #define _mm_maskz_ipcvtps_epi8(U, A)                                           \
158   ((__m128i)__builtin_ia32_vcvtps2ibs128_mask(                                 \
159       (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
160 
161 #define _mm256_ipcvtps_epi8(A)                                                 \
162   ((__m256i)__builtin_ia32_vcvtps2ibs256_mask(                                 \
163       (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
164       _MM_FROUND_CUR_DIRECTION))
165 
166 #define _mm256_mask_ipcvtps_epi8(W, U, A)                                      \
167   ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A),             \
168                                               (__v8su)(W), (__mmask8)(U),      \
169                                               _MM_FROUND_CUR_DIRECTION))
170 
171 #define _mm256_maskz_ipcvtps_epi8(U, A)                                        \
172   ((__m256i)__builtin_ia32_vcvtps2ibs256_mask(                                 \
173       (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U),    \
174       _MM_FROUND_CUR_DIRECTION))
175 
176 #define _mm256_ipcvt_roundps_epi8(A, R)                                        \
177   ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A),             \
178                                               (__v8su)_mm256_setzero_si256(),  \
179                                               (__mmask8)-1, (const int)R))
180 
181 #define _mm256_mask_ipcvt_roundps_epi8(W, U, A, R)                             \
182   ((__m256i)__builtin_ia32_vcvtps2ibs256_mask(                                 \
183       (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
184 
185 #define _mm256_maskz_ipcvt_roundps_epi8(U, A, R)                               \
186   ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A),             \
187                                               (__v8su)_mm256_setzero_si256(),  \
188                                               (__mmask8)(U), (const int)R))
189 
190 #define _mm_ipcvtps_epu8(A)                                                    \
191   ((__m128i)__builtin_ia32_vcvtps2iubs128_mask(                                \
192       (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
193 
194 #define _mm_mask_ipcvtps_epu8(W, U, A)                                         \
195   ((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A),            \
196                                                (__v4su)(W), (__mmask8)(U)))
197 
198 #define _mm_maskz_ipcvtps_epu8(U, A)                                           \
199   ((__m128i)__builtin_ia32_vcvtps2iubs128_mask(                                \
200       (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
201 
202 #define _mm256_ipcvtps_epu8(A)                                                 \
203   ((__m256i)__builtin_ia32_vcvtps2iubs256_mask(                                \
204       (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
205       _MM_FROUND_CUR_DIRECTION))
206 
207 #define _mm256_mask_ipcvtps_epu8(W, U, A)                                      \
208   ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A),            \
209                                                (__v8su)(W), (__mmask8)(U),     \
210                                                _MM_FROUND_CUR_DIRECTION))
211 
212 #define _mm256_maskz_ipcvtps_epu8(U, A)                                        \
213   ((__m256i)__builtin_ia32_vcvtps2iubs256_mask(                                \
214       (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U),    \
215       _MM_FROUND_CUR_DIRECTION))
216 
217 #define _mm256_ipcvt_roundps_epu8(A, R)                                        \
218   ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A),            \
219                                                (__v8su)_mm256_setzero_si256(), \
220                                                (__mmask8)-1, (const int)R))
221 
222 #define _mm256_mask_ipcvt_roundps_epu8(W, U, A, R)                             \
223   ((__m256i)__builtin_ia32_vcvtps2iubs256_mask(                                \
224       (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
225 
226 #define _mm256_maskz_ipcvt_roundps_epu8(U, A, R)                               \
227   ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A),            \
228                                                (__v8su)_mm256_setzero_si256(), \
229                                                (__mmask8)(U), (const int)R))
230 
231 #define _mm_ipcvttnebf16_epi8(A)                                               \
232   ((__m128i)__builtin_ia32_vcvttnebf162ibs128((__v8bf)(__m128bh)(A)))
233 
234 #define _mm_mask_ipcvttnebf16_epi8(W, U, A)                                    \
235   ((__m128i)__builtin_ia32_selectw_128(                                        \
236       (__mmask8)(U), (__v8hi)_mm_ipcvttnebf16_epi8(A), (__v8hi)(__m128i)(W)))
237 
238 #define _mm_maskz_ipcvttnebf16_epi8(U, A)                                      \
239   ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U),                          \
240                                        (__v8hi)_mm_ipcvttnebf16_epi8(A),       \
241                                        (__v8hi)_mm_setzero_si128()))
242 
243 #define _mm256_ipcvttnebf16_epi8(A)                                            \
244   ((__m256i)__builtin_ia32_vcvttnebf162ibs256((__v16bf)(__m256bh)(A)))
245 
246 #define _mm256_mask_ipcvttnebf16_epi8(W, U, A)                                 \
247   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
248                                        (__v16hi)_mm256_ipcvttnebf16_epi8(A),   \
249                                        (__v16hi)(__m256i)(W)))
250 
251 #define _mm256_maskz_ipcvttnebf16_epi8(U, A)                                   \
252   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
253                                        (__v16hi)_mm256_ipcvttnebf16_epi8(A),   \
254                                        (__v16hi)_mm256_setzero_si256()))
255 
256 #define _mm_ipcvttnebf16_epu8(A)                                               \
257   ((__m128i)__builtin_ia32_vcvttnebf162iubs128((__v8bf)(__m128bh)(A)))
258 
259 #define _mm_mask_ipcvttnebf16_epu8(W, U, A)                                    \
260   ((__m128i)__builtin_ia32_selectw_128(                                        \
261       (__mmask8)(U), (__v8hi)_mm_ipcvttnebf16_epu8(A), (__v8hi)(__m128i)(W)))
262 
263 #define _mm_maskz_ipcvttnebf16_epu8(U, A)                                      \
264   ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U),                          \
265                                        (__v8hi)_mm_ipcvttnebf16_epu8(A),       \
266                                        (__v8hi)_mm_setzero_si128()))
267 
268 #define _mm256_ipcvttnebf16_epu8(A)                                            \
269   ((__m256i)__builtin_ia32_vcvttnebf162iubs256((__v16bf)(__m256bh)(A)))
270 
271 #define _mm256_mask_ipcvttnebf16_epu8(W, U, A)                                 \
272   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
273                                        (__v16hi)_mm256_ipcvttnebf16_epu8(A),   \
274                                        (__v16hi)(__m256i)(W)))
275 
276 #define _mm256_maskz_ipcvttnebf16_epu8(U, A)                                   \
277   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
278                                        (__v16hi)_mm256_ipcvttnebf16_epu8(A),   \
279                                        (__v16hi)_mm256_setzero_si256()))
280 
281 #define _mm_ipcvttph_epi8(A)                                                   \
282   ((__m128i)__builtin_ia32_vcvttph2ibs128_mask(                                \
283       (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
284 
285 #define _mm_mask_ipcvttph_epi8(W, U, A)                                        \
286   ((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A),           \
287                                                (__v8hu)(W), (__mmask8)(U)))
288 
289 #define _mm_maskz_ipcvttph_epi8(U, A)                                          \
290   ((__m128i)__builtin_ia32_vcvttph2ibs128_mask(                                \
291       (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
292 
293 #define _mm256_ipcvttph_epi8(A)                                                \
294   ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
295       (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
296       _MM_FROUND_CUR_DIRECTION))
297 
298 #define _mm256_mask_ipcvttph_epi8(W, U, A)                                     \
299   ((__m256i)__builtin_ia32_vcvttph2ibs256_mask((__v16hf)(__m256h)(A),          \
300                                                (__v16hu)(W), (__mmask16)(U),   \
301                                                _MM_FROUND_CUR_DIRECTION))
302 
303 #define _mm256_maskz_ipcvttph_epi8(U, A)                                       \
304   ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
305       (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()),                \
306       (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
307 
308 #define _mm256_ipcvtt_roundph_epi8(A, R)                                       \
309   ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
310       (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
311       (const int)R))
312 
313 #define _mm256_mask_ipcvtt_roundph_epi8(W, U, A, R)                            \
314   ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
315       (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
316 
317 #define _mm256_maskz_ipcvtt_roundph_epi8(U, A, R)                              \
318   ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
319       (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U),  \
320       (const int)R))
321 
322 #define _mm_ipcvttph_epu8(A)                                                   \
323   ((__m128i)__builtin_ia32_vcvttph2iubs128_mask(                               \
324       (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
325 
326 #define _mm_mask_ipcvttph_epu8(W, U, A)                                        \
327   ((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A),          \
328                                                 (__v8hu)(W), (__mmask8)(U)))
329 
330 #define _mm_maskz_ipcvttph_epu8(U, A)                                          \
331   ((__m128i)__builtin_ia32_vcvttph2iubs128_mask(                               \
332       (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
333 
334 #define _mm256_ipcvttph_epu8(A)                                                \
335   ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
336       (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
337       _MM_FROUND_CUR_DIRECTION))
338 
339 #define _mm256_mask_ipcvttph_epu8(W, U, A)                                     \
340   ((__m256i)__builtin_ia32_vcvttph2iubs256_mask((__v16hf)(__m256h)(A),         \
341                                                 (__v16hu)(W), (__mmask16)(U),  \
342                                                 _MM_FROUND_CUR_DIRECTION))
343 
344 #define _mm256_maskz_ipcvttph_epu8(U, A)                                       \
345   ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
346       (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()),                \
347       (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
348 
349 #define _mm256_ipcvtt_roundph_epu8(A, R)                                       \
350   ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
351       (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
352       (const int)R))
353 
354 #define _mm256_mask_ipcvtt_roundph_epu8(W, U, A, R)                            \
355   ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
356       (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
357 
358 #define _mm256_maskz_ipcvtt_roundph_epu8(U, A, R)                              \
359   ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
360       (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U),  \
361       (const int)R))
362 
363 #define _mm_ipcvttps_epi8(A)                                                   \
364   ((__m128i)__builtin_ia32_vcvttps2ibs128_mask(                                \
365       (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
366 
367 #define _mm_mask_ipcvttps_epi8(W, U, A)                                        \
368   ((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A),            \
369                                                (__v4su)(W), (__mmask8)(U)))
370 
371 #define _mm_maskz_ipcvttps_epi8(U, A)                                          \
372   ((__m128i)__builtin_ia32_vcvttps2ibs128_mask(                                \
373       (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
374 
375 #define _mm256_ipcvttps_epi8(A)                                                \
376   ((__m256i)__builtin_ia32_vcvttps2ibs256_mask(                                \
377       (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
378       _MM_FROUND_CUR_DIRECTION))
379 
380 #define _mm256_mask_ipcvttps_epi8(W, U, A)                                     \
381   ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A),            \
382                                                (__v8su)(W), (__mmask8)(U),     \
383                                                _MM_FROUND_CUR_DIRECTION))
384 
385 #define _mm256_maskz_ipcvttps_epi8(U, A)                                       \
386   ((__m256i)__builtin_ia32_vcvttps2ibs256_mask(                                \
387       (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U),    \
388       _MM_FROUND_CUR_DIRECTION))
389 
390 #define _mm256_ipcvtt_roundps_epi8(A, R)                                       \
391   ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A),            \
392                                                (__v8su)_mm256_setzero_si256(), \
393                                                (__mmask8)-1, (const int)R))
394 
395 #define _mm256_mask_ipcvtt_roundps_epi8(W, U, A, R)                            \
396   ((__m256i)__builtin_ia32_vcvttps2ibs256_mask(                                \
397       (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
398 
399 #define _mm256_maskz_ipcvtt_roundps_epi8(U, A, R)                              \
400   ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A),            \
401                                                (__v8su)_mm256_setzero_si256(), \
402                                                (__mmask8)(U), (const int)R))
403 
404 #define _mm_ipcvttps_epu8(A)                                                   \
405   ((__m128i)__builtin_ia32_vcvttps2iubs128_mask(                               \
406       (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
407 
408 #define _mm_mask_ipcvttps_epu8(W, U, A)                                        \
409   ((__m128i)__builtin_ia32_vcvttps2iubs128_mask((__v4sf)(__m128)(A),           \
410                                                 (__v4su)(W), (__mmask8)(U)))
411 
412 #define _mm_maskz_ipcvttps_epu8(U, A)                                          \
413   ((__m128i)__builtin_ia32_vcvttps2iubs128_mask(                               \
414       (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
415 
416 #define _mm256_ipcvttps_epu8(A)                                                \
417   ((__m256i)__builtin_ia32_vcvttps2iubs256_mask(                               \
418       (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
419       _MM_FROUND_CUR_DIRECTION))
420 
421 #define _mm256_mask_ipcvttps_epu8(W, U, A)                                     \
422   ((__m256i)__builtin_ia32_vcvttps2iubs256_mask((__v8sf)(__m256)(A),           \
423                                                 (__v8su)(W), (__mmask8)(U),    \
424                                                 _MM_FROUND_CUR_DIRECTION))
425 
426 #define _mm256_maskz_ipcvttps_epu8(U, A)                                       \
427   ((__m256i)__builtin_ia32_vcvttps2iubs256_mask(                               \
428       (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U),    \
429       _MM_FROUND_CUR_DIRECTION))
430 
431 #define _mm256_ipcvtt_roundps_epu8(A, R)                                       \
432   ((__m256i)__builtin_ia32_vcvttps2iubs256_mask(                               \
433       (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
434       (const int)R))
435 
436 #define _mm256_mask_ipcvtt_roundps_epu8(W, U, A, R)                            \
437   ((__m256i)__builtin_ia32_vcvttps2iubs256_mask(                               \
438       (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
439 
440 #define _mm256_maskz_ipcvtt_roundps_epu8(U, A, R)                              \
441   ((__m256i)__builtin_ia32_vcvttps2iubs256_mask(                               \
442       (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U),      \
443       (const int)R))
444 #endif // __AVX10_2SATCVTINTRIN_H
445