1 /*===------ avx10_2_512satcvtintrin.h - AVX10_2_512SATCVT intrinsics -------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 #ifndef __IMMINTRIN_H 10 #error \ 11 "Never use <avx10_2_512satcvtintrin.h> directly; include <immintrin.h> instead." 12 #endif // __IMMINTRIN_H 13 14 #ifndef __AVX10_2_512SATCVTINTRIN_H 15 #define __AVX10_2_512SATCVTINTRIN_H 16 17 #define _mm512_ipcvtnebf16_epi8(A) \ 18 ((__m512i)__builtin_ia32_vcvtnebf162ibs512((__v32bf)(__m512bh)(A))) 19 20 #define _mm512_mask_ipcvtnebf16_epi8(W, U, A) \ 21 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 22 (__v32hi)_mm512_ipcvtnebf16_epi8(A), \ 23 (__v32hi)(__m512i)(W))) 24 25 #define _mm512_maskz_ipcvtnebf16_epi8(U, A) \ 26 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 27 (__v32hi)_mm512_ipcvtnebf16_epi8(A), \ 28 (__v32hi)_mm512_setzero_si512())) 29 30 #define _mm512_ipcvtnebf16_epu8(A) \ 31 ((__m512i)__builtin_ia32_vcvtnebf162iubs512((__v32bf)(__m512bh)(A))) 32 33 #define _mm512_mask_ipcvtnebf16_epu8(W, U, A) \ 34 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 35 (__v32hi)_mm512_ipcvtnebf16_epu8(A), \ 36 (__v32hi)(__m512i)(W))) 37 38 #define _mm512_maskz_ipcvtnebf16_epu8(U, A) \ 39 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 40 (__v32hi)_mm512_ipcvtnebf16_epu8(A), \ 41 (__v32hi)_mm512_setzero_si512())) 42 43 #define _mm512_ipcvttnebf16_epi8(A) \ 44 ((__m512i)__builtin_ia32_vcvttnebf162ibs512((__v32bf)(__m512bh)(A))) 45 46 #define _mm512_mask_ipcvttnebf16_epi8(W, U, A) \ 47 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 48 (__v32hi)_mm512_ipcvttnebf16_epi8(A), \ 49 (__v32hi)(__m512i)(W))) 50 51 #define _mm512_maskz_ipcvttnebf16_epi8(U, A) \ 52 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 53 (__v32hi)_mm512_ipcvttnebf16_epi8(A), \ 54 (__v32hi)_mm512_setzero_si512())) 55 56 #define _mm512_ipcvttnebf16_epu8(A) \ 57 ((__m512i)__builtin_ia32_vcvttnebf162iubs512((__v32bf)(__m512bh)(A))) 58 59 #define _mm512_mask_ipcvttnebf16_epu8(W, U, A) \ 60 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 61 (__v32hi)_mm512_ipcvttnebf16_epu8(A), \ 62 (__v32hi)(__m512i)(W))) 63 64 #define _mm512_maskz_ipcvttnebf16_epu8(U, A) \ 65 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 66 (__v32hi)_mm512_ipcvttnebf16_epu8(A), \ 67 (__v32hi)_mm512_setzero_si512())) 68 69 #define _mm512_ipcvtph_epi8(A) \ 70 ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \ 71 (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ 72 _MM_FROUND_CUR_DIRECTION)) 73 74 #define _mm512_mask_ipcvtph_epi8(W, U, A) \ 75 ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \ 76 (__v32hu)(W), (__mmask32)(U), \ 77 _MM_FROUND_CUR_DIRECTION)) 78 79 #define _mm512_maskz_ipcvtph_epi8(U, A) \ 80 ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \ 81 (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ 82 _MM_FROUND_CUR_DIRECTION)) 83 84 #define _mm512_ipcvt_roundph_epi8(A, R) \ 85 ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \ 86 (__v32hu)_mm512_setzero_si512(), \ 87 (__mmask32)-1, (const int)R)) 88 89 #define _mm512_mask_ipcvt_roundph_epi8(W, U, A, R) \ 90 ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \ 91 (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R)) 92 93 #define _mm512_maskz_ipcvt_roundph_epi8(U, A, R) \ 94 ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \ 95 (__v32hu)_mm512_setzero_si512(), \ 96 (__mmask32)(U), (const int)R)) 97 98 #define _mm512_ipcvtph_epu8(A) \ 99 ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ 100 (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ 101 _MM_FROUND_CUR_DIRECTION)) 102 103 #define _mm512_mask_ipcvtph_epu8(W, U, A) \ 104 ((__m512i)__builtin_ia32_vcvtph2iubs512_mask((__v32hf)(__m512h)(A), \ 105 (__v32hu)(W), (__mmask32)(U), \ 106 _MM_FROUND_CUR_DIRECTION)) 107 108 #define _mm512_maskz_ipcvtph_epu8(U, A) \ 109 ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ 110 (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ 111 _MM_FROUND_CUR_DIRECTION)) 112 113 #define _mm512_ipcvt_roundph_epu8(A, R) \ 114 ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ 115 (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ 116 (const int)R)) 117 118 #define _mm512_mask_ipcvt_roundph_epu8(W, U, A, R) \ 119 ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ 120 (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R)) 121 122 #define _mm512_maskz_ipcvt_roundph_epu8(U, A, R) \ 123 ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ 124 (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ 125 (const int)R)) 126 127 #define _mm512_ipcvtps_epi8(A) \ 128 ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \ 129 (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ 130 _MM_FROUND_CUR_DIRECTION)) 131 132 #define _mm512_mask_ipcvtps_epi8(W, U, A) \ 133 ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \ 134 (__v16su)(W), (__mmask16)(U), \ 135 _MM_FROUND_CUR_DIRECTION)) 136 137 #define _mm512_maskz_ipcvtps_epi8(U, A) \ 138 ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \ 139 (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ 140 _MM_FROUND_CUR_DIRECTION)) 141 142 #define _mm512_ipcvt_roundps_epi8(A, R) \ 143 ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \ 144 (__v16su)_mm512_setzero_si512(), \ 145 (__mmask16)-1, (const int)R)) 146 147 #define _mm512_mask_ipcvt_roundps_epi8(W, U, A, R) \ 148 ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \ 149 (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R)) 150 151 #define _mm512_maskz_ipcvt_roundps_epi8(U, A, R) \ 152 ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \ 153 (__v16su)_mm512_setzero_si512(), \ 154 (__mmask16)(U), (const int)R)) 155 156 #define _mm512_ipcvtps_epu8(A) \ 157 ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ 158 (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ 159 _MM_FROUND_CUR_DIRECTION)) 160 161 #define _mm512_mask_ipcvtps_epu8(W, U, A) \ 162 ((__m512i)__builtin_ia32_vcvtps2iubs512_mask((__v16sf)(__m512)(A), \ 163 (__v16su)(W), (__mmask16)(U), \ 164 _MM_FROUND_CUR_DIRECTION)) 165 166 #define _mm512_maskz_ipcvtps_epu8(U, A) \ 167 ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ 168 (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ 169 _MM_FROUND_CUR_DIRECTION)) 170 171 #define _mm512_ipcvt_roundps_epu8(A, R) \ 172 ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ 173 (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ 174 (const int)R)) 175 176 #define _mm512_mask_ipcvt_roundps_epu8(W, U, A, R) \ 177 ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ 178 (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R)) 179 180 #define _mm512_maskz_ipcvt_roundps_epu8(U, A, R) \ 181 ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ 182 (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ 183 (const int)R)) 184 185 #define _mm512_ipcvttph_epi8(A) \ 186 ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ 187 (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ 188 _MM_FROUND_CUR_DIRECTION)) 189 190 #define _mm512_mask_ipcvttph_epi8(W, U, A) \ 191 ((__m512i)__builtin_ia32_vcvttph2ibs512_mask((__v32hf)(__m512h)(A), \ 192 (__v32hu)(W), (__mmask32)(U), \ 193 _MM_FROUND_CUR_DIRECTION)) 194 195 #define _mm512_maskz_ipcvttph_epi8(U, A) \ 196 ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ 197 (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ 198 _MM_FROUND_CUR_DIRECTION)) 199 200 #define _mm512_ipcvtt_roundph_epi8(A, S) \ 201 ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ 202 (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ 203 S)) 204 205 #define _mm512_mask_ipcvtt_roundph_epi8(W, U, A, S) \ 206 ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ 207 (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S)) 208 209 #define _mm512_maskz_ipcvtt_roundph_epi8(U, A, S) \ 210 ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ 211 (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ 212 S)) 213 214 #define _mm512_ipcvttph_epu8(A) \ 215 ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ 216 (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ 217 _MM_FROUND_CUR_DIRECTION)) 218 219 #define _mm512_mask_ipcvttph_epu8(W, U, A) \ 220 ((__m512i)__builtin_ia32_vcvttph2iubs512_mask((__v32hf)(__m512h)(A), \ 221 (__v32hu)(W), (__mmask32)(U), \ 222 _MM_FROUND_CUR_DIRECTION)) 223 224 #define _mm512_maskz_ipcvttph_epu8(U, A) \ 225 ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ 226 (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ 227 _MM_FROUND_CUR_DIRECTION)) 228 229 #define _mm512_ipcvtt_roundph_epu8(A, S) \ 230 ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ 231 (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ 232 S)) 233 234 #define _mm512_mask_ipcvtt_roundph_epu8(W, U, A, S) \ 235 ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ 236 (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S)) 237 238 #define _mm512_maskz_ipcvtt_roundph_epu8(U, A, S) \ 239 ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ 240 (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ 241 S)) 242 243 #define _mm512_ipcvttps_epi8(A) \ 244 ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ 245 (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ 246 _MM_FROUND_CUR_DIRECTION)) 247 248 #define _mm512_mask_ipcvttps_epi8(W, U, A) \ 249 ((__m512i)__builtin_ia32_vcvttps2ibs512_mask((__v16sf)(__m512h)(A), \ 250 (__v16su)(W), (__mmask16)(U), \ 251 _MM_FROUND_CUR_DIRECTION)) 252 253 #define _mm512_maskz_ipcvttps_epi8(U, A) \ 254 ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ 255 (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ 256 _MM_FROUND_CUR_DIRECTION)) 257 258 #define _mm512_ipcvtt_roundps_epi8(A, S) \ 259 ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ 260 (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ 261 S)) 262 263 #define _mm512_mask_ipcvtt_roundps_epi8(W, U, A, S) \ 264 ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ 265 (__v16sf)(__m512h)(A), (__v16su)(W), (__mmask16)(U), S)) 266 267 #define _mm512_maskz_ipcvtt_roundps_epi8(U, A, S) \ 268 ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ 269 (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ 270 S)) 271 272 #define _mm512_ipcvttps_epu8(A) \ 273 ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ 274 (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ 275 _MM_FROUND_CUR_DIRECTION)) 276 277 #define _mm512_mask_ipcvttps_epu8(W, U, A) \ 278 ((__m512i)__builtin_ia32_vcvttps2iubs512_mask((__v16sf)(__m512h)(A), \ 279 (__v16su)(W), (__mmask16)(U), \ 280 _MM_FROUND_CUR_DIRECTION)) 281 282 #define _mm512_maskz_ipcvttps_epu8(U, A) \ 283 ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ 284 (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ 285 _MM_FROUND_CUR_DIRECTION)) 286 287 #define _mm512_ipcvtt_roundps_epu8(A, S) \ 288 ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ 289 (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ 290 S)) 291 292 #define _mm512_mask_ipcvtt_roundps_epu8(W, U, A, S) \ 293 ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ 294 (__v16sf)(__m512h)(A), (__v16su)(W), (__mmask16)(U), S)) 295 296 #define _mm512_maskz_ipcvtt_roundps_epu8(U, A, S) \ 297 ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ 298 (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ 299 S)) 300 301 #endif // __AVX10_2_512SATCVTINTRIN_H 302