1 /*===----------- avx10_2satcvtintrin.h - AVX10_2SATCVT intrinsics ----------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 #ifndef __IMMINTRIN_H 10 #error \ 11 "Never use <avx10_2satcvtintrin.h> directly; include <immintrin.h> instead." 12 #endif // __IMMINTRIN_H 13 14 #ifndef __AVX10_2SATCVTINTRIN_H 15 #define __AVX10_2SATCVTINTRIN_H 16 17 #define _mm_ipcvtnebf16_epi8(A) \ 18 ((__m128i)__builtin_ia32_vcvtnebf162ibs128((__v8bf)(__m128bh)(A))) 19 20 #define _mm_mask_ipcvtnebf16_epi8(W, U, A) \ 21 ((__m128i)__builtin_ia32_selectw_128( \ 22 (__mmask8)(U), (__v8hi)_mm_ipcvtnebf16_epi8(A), (__v8hi)(__m128i)(W))) 23 24 #define _mm_maskz_ipcvtnebf16_epi8(U, A) \ 25 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 26 (__v8hi)_mm_ipcvtnebf16_epi8(A), \ 27 (__v8hi)_mm_setzero_si128())) 28 29 #define _mm256_ipcvtnebf16_epi8(A) \ 30 ((__m256i)__builtin_ia32_vcvtnebf162ibs256((__v16bf)(__m256bh)(A))) 31 32 #define _mm256_mask_ipcvtnebf16_epi8(W, U, A) \ 33 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 34 (__v16hi)_mm256_ipcvtnebf16_epi8(A), \ 35 (__v16hi)(__m256i)(W))) 36 37 #define _mm256_maskz_ipcvtnebf16_epi8(U, A) \ 38 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 39 (__v16hi)_mm256_ipcvtnebf16_epi8(A), \ 40 (__v16hi)_mm256_setzero_si256())) 41 42 #define _mm_ipcvtnebf16_epu8(A) \ 43 ((__m128i)__builtin_ia32_vcvtnebf162iubs128((__v8bf)(__m128bh)(A))) 44 45 #define _mm_mask_ipcvtnebf16_epu8(W, U, A) \ 46 ((__m128i)__builtin_ia32_selectw_128( \ 47 (__mmask8)(U), (__v8hi)_mm_ipcvtnebf16_epu8(A), (__v8hi)(__m128i)(W))) 48 49 #define _mm_maskz_ipcvtnebf16_epu8(U, A) \ 50 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 51 (__v8hi)_mm_ipcvtnebf16_epu8(A), \ 52 (__v8hi)_mm_setzero_si128())) 53 54 #define _mm256_ipcvtnebf16_epu8(A) \ 55 ((__m256i)__builtin_ia32_vcvtnebf162iubs256((__v16bf)(__m256bh)(A))) 56 57 #define _mm256_mask_ipcvtnebf16_epu8(W, U, A) \ 58 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 59 (__v16hi)_mm256_ipcvtnebf16_epu8(A), \ 60 (__v16hi)(__m256i)(W))) 61 62 #define _mm256_maskz_ipcvtnebf16_epu8(U, A) \ 63 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 64 (__v16hi)_mm256_ipcvtnebf16_epu8(A), \ 65 (__v16hi)_mm256_setzero_si256())) 66 67 #define _mm_ipcvtph_epi8(A) \ 68 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \ 69 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) 70 71 #define _mm_mask_ipcvtph_epi8(W, U, A) \ 72 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A), \ 73 (__v8hu)(W), (__mmask8)(U))) 74 75 #define _mm_maskz_ipcvtph_epi8(U, A) \ 76 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \ 77 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) 78 79 #define _mm256_ipcvtph_epi8(A) \ 80 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \ 81 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ 82 _MM_FROUND_CUR_DIRECTION)) 83 84 #define _mm256_mask_ipcvtph_epi8(W, U, A) \ 85 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \ 86 (__v16hu)(W), (__mmask16)(U), \ 87 _MM_FROUND_CUR_DIRECTION)) 88 89 #define _mm256_maskz_ipcvtph_epi8(U, A) \ 90 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \ 91 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ 92 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 93 94 #define _mm256_ipcvt_roundph_epi8(A, R) \ 95 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \ 96 (__v16hu)_mm256_setzero_si256(), \ 97 (__mmask16)-1, (const int)R)) 98 99 #define _mm256_mask_ipcvt_roundph_epi8(W, U, A, R) \ 100 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \ 101 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) 102 103 #define _mm256_maskz_ipcvt_roundph_epi8(U, A, R) \ 104 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \ 105 (__v16hu)_mm256_setzero_si256(), \ 106 (__mmask16)(U), (const int)R)) 107 108 #define _mm_ipcvtph_epu8(A) \ 109 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \ 110 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) 111 112 #define _mm_mask_ipcvtph_epu8(W, U, A) \ 113 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A), \ 114 (__v8hu)(W), (__mmask8)(U))) 115 116 #define _mm_maskz_ipcvtph_epu8(U, A) \ 117 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \ 118 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) 119 120 #define _mm256_ipcvtph_epu8(A) \ 121 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ 122 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ 123 _MM_FROUND_CUR_DIRECTION)) 124 125 #define _mm256_mask_ipcvtph_epu8(W, U, A) \ 126 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask((__v16hf)(__m256h)(A), \ 127 (__v16hu)(W), (__mmask16)(U), \ 128 _MM_FROUND_CUR_DIRECTION)) 129 130 #define _mm256_maskz_ipcvtph_epu8(U, A) \ 131 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ 132 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ 133 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 134 135 #define _mm256_ipcvt_roundph_epu8(A, R) \ 136 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ 137 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ 138 (const int)R)) 139 140 #define _mm256_mask_ipcvt_roundph_epu8(W, U, A, R) \ 141 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ 142 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) 143 144 #define _mm256_maskz_ipcvt_roundph_epu8(U, A, R) \ 145 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ 146 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ 147 (const int)R)) 148 149 #define _mm_ipcvtps_epi8(A) \ 150 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \ 151 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) 152 153 #define _mm_mask_ipcvtps_epi8(W, U, A) \ 154 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A), \ 155 (__v4su)(W), (__mmask8)(U))) 156 157 #define _mm_maskz_ipcvtps_epi8(U, A) \ 158 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \ 159 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) 160 161 #define _mm256_ipcvtps_epi8(A) \ 162 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \ 163 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ 164 _MM_FROUND_CUR_DIRECTION)) 165 166 #define _mm256_mask_ipcvtps_epi8(W, U, A) \ 167 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \ 168 (__v8su)(W), (__mmask8)(U), \ 169 _MM_FROUND_CUR_DIRECTION)) 170 171 #define _mm256_maskz_ipcvtps_epi8(U, A) \ 172 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \ 173 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ 174 _MM_FROUND_CUR_DIRECTION)) 175 176 #define _mm256_ipcvt_roundps_epi8(A, R) \ 177 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \ 178 (__v8su)_mm256_setzero_si256(), \ 179 (__mmask8)-1, (const int)R)) 180 181 #define _mm256_mask_ipcvt_roundps_epi8(W, U, A, R) \ 182 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \ 183 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) 184 185 #define _mm256_maskz_ipcvt_roundps_epi8(U, A, R) \ 186 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \ 187 (__v8su)_mm256_setzero_si256(), \ 188 (__mmask8)(U), (const int)R)) 189 190 #define _mm_ipcvtps_epu8(A) \ 191 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \ 192 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) 193 194 #define _mm_mask_ipcvtps_epu8(W, U, A) \ 195 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A), \ 196 (__v4su)(W), (__mmask8)(U))) 197 198 #define _mm_maskz_ipcvtps_epu8(U, A) \ 199 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \ 200 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) 201 202 #define _mm256_ipcvtps_epu8(A) \ 203 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \ 204 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ 205 _MM_FROUND_CUR_DIRECTION)) 206 207 #define _mm256_mask_ipcvtps_epu8(W, U, A) \ 208 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \ 209 (__v8su)(W), (__mmask8)(U), \ 210 _MM_FROUND_CUR_DIRECTION)) 211 212 #define _mm256_maskz_ipcvtps_epu8(U, A) \ 213 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \ 214 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ 215 _MM_FROUND_CUR_DIRECTION)) 216 217 #define _mm256_ipcvt_roundps_epu8(A, R) \ 218 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \ 219 (__v8su)_mm256_setzero_si256(), \ 220 (__mmask8)-1, (const int)R)) 221 222 #define _mm256_mask_ipcvt_roundps_epu8(W, U, A, R) \ 223 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \ 224 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) 225 226 #define _mm256_maskz_ipcvt_roundps_epu8(U, A, R) \ 227 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \ 228 (__v8su)_mm256_setzero_si256(), \ 229 (__mmask8)(U), (const int)R)) 230 231 #define _mm_ipcvttnebf16_epi8(A) \ 232 ((__m128i)__builtin_ia32_vcvttnebf162ibs128((__v8bf)(__m128bh)(A))) 233 234 #define _mm_mask_ipcvttnebf16_epi8(W, U, A) \ 235 ((__m128i)__builtin_ia32_selectw_128( \ 236 (__mmask8)(U), (__v8hi)_mm_ipcvttnebf16_epi8(A), (__v8hi)(__m128i)(W))) 237 238 #define _mm_maskz_ipcvttnebf16_epi8(U, A) \ 239 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 240 (__v8hi)_mm_ipcvttnebf16_epi8(A), \ 241 (__v8hi)_mm_setzero_si128())) 242 243 #define _mm256_ipcvttnebf16_epi8(A) \ 244 ((__m256i)__builtin_ia32_vcvttnebf162ibs256((__v16bf)(__m256bh)(A))) 245 246 #define _mm256_mask_ipcvttnebf16_epi8(W, U, A) \ 247 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 248 (__v16hi)_mm256_ipcvttnebf16_epi8(A), \ 249 (__v16hi)(__m256i)(W))) 250 251 #define _mm256_maskz_ipcvttnebf16_epi8(U, A) \ 252 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 253 (__v16hi)_mm256_ipcvttnebf16_epi8(A), \ 254 (__v16hi)_mm256_setzero_si256())) 255 256 #define _mm_ipcvttnebf16_epu8(A) \ 257 ((__m128i)__builtin_ia32_vcvttnebf162iubs128((__v8bf)(__m128bh)(A))) 258 259 #define _mm_mask_ipcvttnebf16_epu8(W, U, A) \ 260 ((__m128i)__builtin_ia32_selectw_128( \ 261 (__mmask8)(U), (__v8hi)_mm_ipcvttnebf16_epu8(A), (__v8hi)(__m128i)(W))) 262 263 #define _mm_maskz_ipcvttnebf16_epu8(U, A) \ 264 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 265 (__v8hi)_mm_ipcvttnebf16_epu8(A), \ 266 (__v8hi)_mm_setzero_si128())) 267 268 #define _mm256_ipcvttnebf16_epu8(A) \ 269 ((__m256i)__builtin_ia32_vcvttnebf162iubs256((__v16bf)(__m256bh)(A))) 270 271 #define _mm256_mask_ipcvttnebf16_epu8(W, U, A) \ 272 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 273 (__v16hi)_mm256_ipcvttnebf16_epu8(A), \ 274 (__v16hi)(__m256i)(W))) 275 276 #define _mm256_maskz_ipcvttnebf16_epu8(U, A) \ 277 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 278 (__v16hi)_mm256_ipcvttnebf16_epu8(A), \ 279 (__v16hi)_mm256_setzero_si256())) 280 281 #define _mm_ipcvttph_epi8(A) \ 282 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \ 283 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) 284 285 #define _mm_mask_ipcvttph_epi8(W, U, A) \ 286 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A), \ 287 (__v8hu)(W), (__mmask8)(U))) 288 289 #define _mm_maskz_ipcvttph_epi8(U, A) \ 290 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \ 291 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) 292 293 #define _mm256_ipcvttph_epi8(A) \ 294 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ 295 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ 296 _MM_FROUND_CUR_DIRECTION)) 297 298 #define _mm256_mask_ipcvttph_epi8(W, U, A) \ 299 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask((__v16hf)(__m256h)(A), \ 300 (__v16hu)(W), (__mmask16)(U), \ 301 _MM_FROUND_CUR_DIRECTION)) 302 303 #define _mm256_maskz_ipcvttph_epi8(U, A) \ 304 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ 305 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ 306 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 307 308 #define _mm256_ipcvtt_roundph_epi8(A, R) \ 309 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ 310 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ 311 (const int)R)) 312 313 #define _mm256_mask_ipcvtt_roundph_epi8(W, U, A, R) \ 314 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ 315 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) 316 317 #define _mm256_maskz_ipcvtt_roundph_epi8(U, A, R) \ 318 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ 319 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ 320 (const int)R)) 321 322 #define _mm_ipcvttph_epu8(A) \ 323 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \ 324 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) 325 326 #define _mm_mask_ipcvttph_epu8(W, U, A) \ 327 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A), \ 328 (__v8hu)(W), (__mmask8)(U))) 329 330 #define _mm_maskz_ipcvttph_epu8(U, A) \ 331 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \ 332 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) 333 334 #define _mm256_ipcvttph_epu8(A) \ 335 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ 336 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ 337 _MM_FROUND_CUR_DIRECTION)) 338 339 #define _mm256_mask_ipcvttph_epu8(W, U, A) \ 340 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask((__v16hf)(__m256h)(A), \ 341 (__v16hu)(W), (__mmask16)(U), \ 342 _MM_FROUND_CUR_DIRECTION)) 343 344 #define _mm256_maskz_ipcvttph_epu8(U, A) \ 345 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ 346 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ 347 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 348 349 #define _mm256_ipcvtt_roundph_epu8(A, R) \ 350 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ 351 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ 352 (const int)R)) 353 354 #define _mm256_mask_ipcvtt_roundph_epu8(W, U, A, R) \ 355 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ 356 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) 357 358 #define _mm256_maskz_ipcvtt_roundph_epu8(U, A, R) \ 359 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ 360 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ 361 (const int)R)) 362 363 #define _mm_ipcvttps_epi8(A) \ 364 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \ 365 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) 366 367 #define _mm_mask_ipcvttps_epi8(W, U, A) \ 368 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A), \ 369 (__v4su)(W), (__mmask8)(U))) 370 371 #define _mm_maskz_ipcvttps_epi8(U, A) \ 372 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \ 373 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) 374 375 #define _mm256_ipcvttps_epi8(A) \ 376 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \ 377 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ 378 _MM_FROUND_CUR_DIRECTION)) 379 380 #define _mm256_mask_ipcvttps_epi8(W, U, A) \ 381 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \ 382 (__v8su)(W), (__mmask8)(U), \ 383 _MM_FROUND_CUR_DIRECTION)) 384 385 #define _mm256_maskz_ipcvttps_epi8(U, A) \ 386 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \ 387 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ 388 _MM_FROUND_CUR_DIRECTION)) 389 390 #define _mm256_ipcvtt_roundps_epi8(A, R) \ 391 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \ 392 (__v8su)_mm256_setzero_si256(), \ 393 (__mmask8)-1, (const int)R)) 394 395 #define _mm256_mask_ipcvtt_roundps_epi8(W, U, A, R) \ 396 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \ 397 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) 398 399 #define _mm256_maskz_ipcvtt_roundps_epi8(U, A, R) \ 400 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \ 401 (__v8su)_mm256_setzero_si256(), \ 402 (__mmask8)(U), (const int)R)) 403 404 #define _mm_ipcvttps_epu8(A) \ 405 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \ 406 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) 407 408 #define _mm_mask_ipcvttps_epu8(W, U, A) \ 409 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask((__v4sf)(__m128)(A), \ 410 (__v4su)(W), (__mmask8)(U))) 411 412 #define _mm_maskz_ipcvttps_epu8(U, A) \ 413 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \ 414 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) 415 416 #define _mm256_ipcvttps_epu8(A) \ 417 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ 418 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ 419 _MM_FROUND_CUR_DIRECTION)) 420 421 #define _mm256_mask_ipcvttps_epu8(W, U, A) \ 422 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask((__v8sf)(__m256)(A), \ 423 (__v8su)(W), (__mmask8)(U), \ 424 _MM_FROUND_CUR_DIRECTION)) 425 426 #define _mm256_maskz_ipcvttps_epu8(U, A) \ 427 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ 428 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ 429 _MM_FROUND_CUR_DIRECTION)) 430 431 #define _mm256_ipcvtt_roundps_epu8(A, R) \ 432 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ 433 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ 434 (const int)R)) 435 436 #define _mm256_mask_ipcvtt_roundps_epu8(W, U, A, R) \ 437 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ 438 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) 439 440 #define _mm256_maskz_ipcvtt_roundps_epu8(U, A, R) \ 441 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ 442 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), \ 443 (const int)R)) 444 #endif // __AVX10_2SATCVTINTRIN_H 445