1 /*===-------- avx10_2minmaxintrin.h - AVX10_2MINMAX intrinsics -------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 #ifndef __IMMINTRIN_H 10 #error \ 11 "Never use <avx10_2minmaxintrin.h> directly; include <immintrin.h> instead." 12 #endif // __IMMINTRIN_H 13 14 #ifndef __AVX10_2MINMAXINTRIN_H 15 #define __AVX10_2MINMAXINTRIN_H 16 17 #define _mm_minmaxne_pbh(A, B, C) \ 18 ((__m128bh)__builtin_ia32_vminmaxnepbf16128( \ 19 (__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), (int)(C))) 20 21 #define _mm_mask_minmaxne_pbh(W, U, A, B, C) \ 22 ((__m128bh)__builtin_ia32_selectpbf_128( \ 23 (__mmask8)(U), \ 24 (__v8bf)_mm_minmaxne_pbh((__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), \ 25 (int)(C)), \ 26 (__v8bf)(W))) 27 28 #define _mm_maskz_minmaxne_pbh(U, A, B, C) \ 29 ((__m128bh)__builtin_ia32_selectpbf_128( \ 30 (__mmask8)(U), \ 31 (__v8bf)_mm_minmaxne_pbh((__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), \ 32 (int)(C)), \ 33 (__v8bf) __builtin_bit_cast(__m128bh, _mm_setzero_ps()))) 34 35 #define _mm256_minmaxne_pbh(A, B, C) \ 36 ((__m256bh)__builtin_ia32_vminmaxnepbf16256( \ 37 (__m256bh)(__v16bf)(A), (__m256bh)(__v16bf)(B), (int)(C))) 38 39 #define _mm256_mask_minmaxne_pbh(W, U, A, B, C) \ 40 ((__m256bh)__builtin_ia32_selectpbf_256( \ 41 (__mmask16)(U), \ 42 (__v16bf)_mm256_minmaxne_pbh((__m256bh)(__v16bf)(A), \ 43 (__m256bh)(__v16bf)(B), (int)(C)), \ 44 (__v16bf)(W))) 45 46 #define _mm256_maskz_minmaxne_pbh(U, A, B, C) \ 47 ((__m256bh)__builtin_ia32_selectpbf_256( \ 48 (__mmask16)(U), \ 49 (__v16bf)_mm256_minmaxne_pbh((__m256bh)(__v16bf)(A), \ 50 (__m256bh)(__v16bf)(B), (int)(C)), \ 51 (__v16bf) __builtin_bit_cast(__m256bh, _mm256_setzero_ps()))) 52 53 #define _mm_minmax_pd(A, B, C) \ 54 ((__m128d)__builtin_ia32_vminmaxpd128_mask( \ 55 (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ 56 (__v2df)_mm_setzero_pd(), (__mmask8)-1)) 57 58 #define _mm_mask_minmax_pd(W, U, A, B, C) \ 59 ((__m128d)__builtin_ia32_vminmaxpd128_mask( \ 60 (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ 61 (__v2df)(__m128d)(W), (__mmask8)(U))) 62 63 #define _mm_maskz_minmax_pd(U, A, B, C) \ 64 ((__m128d)__builtin_ia32_vminmaxpd128_mask( \ 65 (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ 66 (__v2df)_mm_setzero_pd(), (__mmask8)(U))) 67 68 #define _mm256_minmax_pd(A, B, C) \ 69 ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ 70 (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ 71 (__v4df)_mm256_setzero_pd(), (__mmask8)-1, _MM_FROUND_NO_EXC)) 72 73 #define _mm256_mask_minmax_pd(W, U, A, B, C) \ 74 ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ 75 (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ 76 (__v4df)(__m256d)(W), (__mmask8)(U), _MM_FROUND_NO_EXC)) 77 78 #define _mm256_maskz_minmax_pd(U, A, B, C) \ 79 ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ 80 (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ 81 (__v4df)_mm256_setzero_pd(), (__mmask8)(U), _MM_FROUND_NO_EXC)) 82 83 #define _mm256_minmax_round_pd(A, B, C, R) \ 84 ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ 85 (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ 86 (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R))) 87 88 #define _mm256_mask_minmax_round_pd(W, U, A, B, C, R) \ 89 ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ 90 (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ 91 (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) 92 93 #define _mm256_maskz_minmax_round_pd(U, A, B, C, R) \ 94 ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ 95 (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ 96 (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R))) 97 98 #define _mm_minmax_ph(A, B, C) \ 99 ((__m128h)__builtin_ia32_vminmaxph128_mask( \ 100 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ 101 (__v8hf)_mm_setzero_ph(), (__mmask8)-1)) 102 103 #define _mm_mask_minmax_ph(W, U, A, B, C) \ 104 ((__m128h)__builtin_ia32_vminmaxph128_mask( \ 105 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ 106 (__v8hf)(__m128h)(W), (__mmask16)-1)) 107 108 #define _mm_maskz_minmax_ph(U, A, B, C) \ 109 ((__m128h)__builtin_ia32_vminmaxph128_mask( \ 110 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ 111 (__v8hf)_mm_setzero_ph(), (__mmask8)(U))) 112 113 #define _mm256_minmax_ph(A, B, C) \ 114 ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ 115 (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ 116 (__v16hf)_mm256_setzero_ph(), (__mmask16)-1, _MM_FROUND_NO_EXC)) 117 118 #define _mm256_mask_minmax_ph(W, U, A, B, C) \ 119 ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ 120 (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ 121 (__v16hf)(__m256h)(W), (__mmask16)(U), _MM_FROUND_NO_EXC)) 122 123 #define _mm256_maskz_minmax_ph(U, A, B, C) \ 124 ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ 125 (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ 126 (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), _MM_FROUND_NO_EXC)) 127 128 #define _mm256_minmax_round_ph(A, B, C, R) \ 129 ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ 130 (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ 131 (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R))) 132 133 #define _mm256_mask_minmax_round_ph(W, U, A, B, C, R) \ 134 ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ 135 (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (C), \ 136 (__v16hf)(__m256h)(W), (__mmask16)(U), (int)(R))) 137 138 #define _mm256_maskz_minmax_round_ph(U, A, B, C, R) \ 139 ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ 140 (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ 141 (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) 142 143 #define _mm_minmax_ps(A, B, C) \ 144 ((__m128)__builtin_ia32_vminmaxps128_mask( \ 145 (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), \ 146 (__v4sf)_mm_setzero_ps(), (__mmask8)-1)) 147 148 #define _mm_mask_minmax_ps(W, U, A, B, C) \ 149 ((__m128)__builtin_ia32_vminmaxps128_mask( \ 150 (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \ 151 (__mmask8)(U))) 152 153 #define _mm_maskz_minmax_ps(U, A, B, C) \ 154 ((__m128)__builtin_ia32_vminmaxps128_mask( \ 155 (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), \ 156 (__v4sf)_mm_setzero_ps(), (__mmask8)(U))) 157 158 #define _mm256_minmax_ps(A, B, C) \ 159 ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ 160 (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ 161 (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, _MM_FROUND_NO_EXC)) 162 163 #define _mm256_mask_minmax_ps(W, U, A, B, C) \ 164 ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ 165 (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \ 166 (__mmask8)(U), _MM_FROUND_NO_EXC)) 167 168 #define _mm256_maskz_minmax_ps(U, A, B, C) \ 169 ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ 170 (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ 171 (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), _MM_FROUND_NO_EXC)) 172 173 #define _mm256_minmax_round_ps(A, B, C, R) \ 174 ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ 175 (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ 176 (__v8sf)_mm256_undefined_ps(), (__mmask8)-1, (int)(R))) 177 178 #define _mm256_mask_minmax_round_ps(W, U, A, B, C, R) \ 179 ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ 180 (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \ 181 (__mmask8)(U), (int)(R))) 182 183 #define _mm256_maskz_minmax_round_ps(U, A, B, C, R) \ 184 ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ 185 (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ 186 (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R))) 187 188 #define _mm_minmax_sd(A, B, C) \ 189 ((__m128d)__builtin_ia32_vminmaxsd_round_mask( \ 190 (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ 191 (__v2df)_mm_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) 192 193 #define _mm_mask_minmax_sd(W, U, A, B, C) \ 194 ((__m128d)__builtin_ia32_vminmaxsd_round_mask( \ 195 (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ 196 (__v2df)(__m128d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 197 198 #define _mm_maskz_minmax_sd(U, A, B, C) \ 199 ((__m128d)__builtin_ia32_vminmaxsd_round_mask( \ 200 (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ 201 (__v2df)_mm_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 202 203 #define _mm_minmax_round_sd(A, B, C, R) \ 204 ((__m128d)__builtin_ia32_vminmaxsd_round_mask( \ 205 (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ 206 (__v2df)_mm_undefined_pd(), (__mmask8)-1, (int)(R))) 207 208 #define _mm_mask_minmax_round_sd(W, U, A, B, C, R) \ 209 ((__m128d)__builtin_ia32_vminmaxsd_round_mask( \ 210 (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ 211 (__v2df)(__m128d)(W), (__mmask8)(U), (int)(R))) 212 213 #define _mm_maskz_minmax_round_sd(U, A, B, C, R) \ 214 ((__m128d)__builtin_ia32_vminmaxsd_round_mask( \ 215 (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ 216 (__v2df)_mm_setzero_pd(), (__mmask8)(U), (int)(R))) 217 218 #define _mm_minmax_sh(A, B, C) \ 219 ((__m128h)__builtin_ia32_vminmaxsh_round_mask( \ 220 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ 221 (__v8hf)_mm_undefined_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) 222 223 #define _mm_mask_minmax_sh(W, U, A, B, C) \ 224 ((__m128h)__builtin_ia32_vminmaxsh_round_mask( \ 225 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ 226 (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 227 228 #define _mm_maskz_minmax_sh(U, A, B, C) \ 229 ((__m128h)__builtin_ia32_vminmaxsh_round_mask( \ 230 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ 231 (__v8hf)_mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 232 233 #define _mm_minmax_round_sh(A, B, C, R) \ 234 ((__m128h)__builtin_ia32_vminmaxsh_round_mask( \ 235 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ 236 (__v8hf)_mm_undefined_ph(), (__mmask8)-1, (int)(R))) 237 238 #define _mm_mask_minmax_round_sh(W, U, A, B, C, R) \ 239 ((__m128h)__builtin_ia32_vminmaxsh_round_mask( \ 240 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ 241 (__v8hf)(__m128h)(W), (__mmask8)(U), (int)(R))) 242 243 #define _mm_maskz_minmax_round_sh(U, A, B, C, R) \ 244 ((__m128h)__builtin_ia32_vminmaxsh_round_mask( \ 245 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ 246 (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) 247 248 #define _mm_minmax_ss(A, B, C) \ 249 ((__m128)__builtin_ia32_vminmaxss_round_mask( \ 250 (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), \ 251 (__v4sf)_mm_undefined_ps(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) 252 253 #define _mm_mask_minmax_ss(W, U, A, B, C) \ 254 ((__m128)__builtin_ia32_vminmaxss_round_mask( \ 255 (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(W), \ 256 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 257 258 #define _mm_maskz_minmax_ss(U, A, B, C) \ 259 ((__m128)__builtin_ia32_vminmaxss_round_mask( \ 260 (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), \ 261 (__v4sf)_mm_setzero_ps(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 262 263 #define _mm_minmax_round_ss(A, B, C, R) \ 264 ((__m128)__builtin_ia32_vminmaxss_round_mask( \ 265 (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), \ 266 (__v4sf)_mm_undefined_ps(), (__mmask8)-1, (int)(R))) 267 268 #define _mm_mask_minmax_round_ss(W, U, A, B, C, R) \ 269 ((__m128)__builtin_ia32_vminmaxss_round_mask( \ 270 (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(W), \ 271 (__mmask8)(U), (int)(R))) 272 273 #define _mm_maskz_minmax_round_ss(U, A, B, C, R) \ 274 ((__m128)__builtin_ia32_vminmaxss_round_mask( \ 275 (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), \ 276 (__v4sf)_mm_setzero_ps(), (__mmask8)(U), (int)(R))) 277 #endif // __AVX10_2MINMAXINTRIN_H 278