• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*===-------- avx10_2minmaxintrin.h - AVX10_2MINMAX intrinsics -------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 #ifndef __IMMINTRIN_H
10 #error                                                                         \
11     "Never use <avx10_2minmaxintrin.h> directly; include <immintrin.h> instead."
12 #endif // __IMMINTRIN_H
13 
14 #ifndef __AVX10_2MINMAXINTRIN_H
15 #define __AVX10_2MINMAXINTRIN_H
16 
17 #define _mm_minmaxne_pbh(A, B, C)                                              \
18   ((__m128bh)__builtin_ia32_vminmaxnepbf16128(                                 \
19       (__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), (int)(C)))
20 
21 #define _mm_mask_minmaxne_pbh(W, U, A, B, C)                                   \
22   ((__m128bh)__builtin_ia32_selectpbf_128(                                     \
23       (__mmask8)(U),                                                           \
24       (__v8bf)_mm_minmaxne_pbh((__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B),   \
25                                (int)(C)),                                      \
26       (__v8bf)(W)))
27 
28 #define _mm_maskz_minmaxne_pbh(U, A, B, C)                                     \
29   ((__m128bh)__builtin_ia32_selectpbf_128(                                     \
30       (__mmask8)(U),                                                           \
31       (__v8bf)_mm_minmaxne_pbh((__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B),   \
32                                (int)(C)),                                      \
33       (__v8bf) __builtin_bit_cast(__m128bh, _mm_setzero_ps())))
34 
35 #define _mm256_minmaxne_pbh(A, B, C)                                           \
36   ((__m256bh)__builtin_ia32_vminmaxnepbf16256(                                 \
37       (__m256bh)(__v16bf)(A), (__m256bh)(__v16bf)(B), (int)(C)))
38 
39 #define _mm256_mask_minmaxne_pbh(W, U, A, B, C)                                \
40   ((__m256bh)__builtin_ia32_selectpbf_256(                                     \
41       (__mmask16)(U),                                                          \
42       (__v16bf)_mm256_minmaxne_pbh((__m256bh)(__v16bf)(A),                     \
43                                    (__m256bh)(__v16bf)(B), (int)(C)),          \
44       (__v16bf)(W)))
45 
46 #define _mm256_maskz_minmaxne_pbh(U, A, B, C)                                  \
47   ((__m256bh)__builtin_ia32_selectpbf_256(                                     \
48       (__mmask16)(U),                                                          \
49       (__v16bf)_mm256_minmaxne_pbh((__m256bh)(__v16bf)(A),                     \
50                                    (__m256bh)(__v16bf)(B), (int)(C)),          \
51       (__v16bf) __builtin_bit_cast(__m256bh, _mm256_setzero_ps())))
52 
53 #define _mm_minmax_pd(A, B, C)                                                 \
54   ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
55       (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
56       (__v2df)_mm_setzero_pd(), (__mmask8)-1))
57 
58 #define _mm_mask_minmax_pd(W, U, A, B, C)                                      \
59   ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
60       (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
61       (__v2df)(__m128d)(W), (__mmask8)(U)))
62 
63 #define _mm_maskz_minmax_pd(U, A, B, C)                                        \
64   ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
65       (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
66       (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
67 
68 #define _mm256_minmax_pd(A, B, C)                                              \
69   ((__m256d)__builtin_ia32_vminmaxpd256_round_mask(                            \
70       (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C),                    \
71       (__v4df)_mm256_setzero_pd(), (__mmask8)-1, _MM_FROUND_NO_EXC))
72 
73 #define _mm256_mask_minmax_pd(W, U, A, B, C)                                   \
74   ((__m256d)__builtin_ia32_vminmaxpd256_round_mask(                            \
75       (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C),                    \
76       (__v4df)(__m256d)(W), (__mmask8)(U), _MM_FROUND_NO_EXC))
77 
78 #define _mm256_maskz_minmax_pd(U, A, B, C)                                     \
79   ((__m256d)__builtin_ia32_vminmaxpd256_round_mask(                            \
80       (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C),                    \
81       (__v4df)_mm256_setzero_pd(), (__mmask8)(U), _MM_FROUND_NO_EXC))
82 
83 #define _mm256_minmax_round_pd(A, B, C, R)                                     \
84   ((__m256d)__builtin_ia32_vminmaxpd256_round_mask(                            \
85       (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C),                    \
86       (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R)))
87 
88 #define _mm256_mask_minmax_round_pd(W, U, A, B, C, R)                          \
89   ((__m256d)__builtin_ia32_vminmaxpd256_round_mask(                            \
90       (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C),                    \
91       (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R)))
92 
93 #define _mm256_maskz_minmax_round_pd(U, A, B, C, R)                            \
94   ((__m256d)__builtin_ia32_vminmaxpd256_round_mask(                            \
95       (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C),                    \
96       (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R)))
97 
98 #define _mm_minmax_ph(A, B, C)                                                 \
99   ((__m128h)__builtin_ia32_vminmaxph128_mask(                                  \
100       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
101       (__v8hf)_mm_setzero_ph(), (__mmask8)-1))
102 
103 #define _mm_mask_minmax_ph(W, U, A, B, C)                                      \
104   ((__m128h)__builtin_ia32_vminmaxph128_mask(                                  \
105       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
106       (__v8hf)(__m128h)(W), (__mmask16)-1))
107 
108 #define _mm_maskz_minmax_ph(U, A, B, C)                                        \
109   ((__m128h)__builtin_ia32_vminmaxph128_mask(                                  \
110       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
111       (__v8hf)_mm_setzero_ph(), (__mmask8)(U)))
112 
113 #define _mm256_minmax_ph(A, B, C)                                              \
114   ((__m256h)__builtin_ia32_vminmaxph256_round_mask(                            \
115       (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C),                  \
116       (__v16hf)_mm256_setzero_ph(), (__mmask16)-1, _MM_FROUND_NO_EXC))
117 
118 #define _mm256_mask_minmax_ph(W, U, A, B, C)                                   \
119   ((__m256h)__builtin_ia32_vminmaxph256_round_mask(                            \
120       (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C),                  \
121       (__v16hf)(__m256h)(W), (__mmask16)(U), _MM_FROUND_NO_EXC))
122 
123 #define _mm256_maskz_minmax_ph(U, A, B, C)                                     \
124   ((__m256h)__builtin_ia32_vminmaxph256_round_mask(                            \
125       (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C),                  \
126       (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), _MM_FROUND_NO_EXC))
127 
128 #define _mm256_minmax_round_ph(A, B, C, R)                                     \
129   ((__m256h)__builtin_ia32_vminmaxph256_round_mask(                            \
130       (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C),                  \
131       (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R)))
132 
133 #define _mm256_mask_minmax_round_ph(W, U, A, B, C, R)                          \
134   ((__m256h)__builtin_ia32_vminmaxph256_round_mask(                            \
135       (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (C),                       \
136       (__v16hf)(__m256h)(W), (__mmask16)(U), (int)(R)))
137 
138 #define _mm256_maskz_minmax_round_ph(U, A, B, C, R)                            \
139   ((__m256h)__builtin_ia32_vminmaxph256_round_mask(                            \
140       (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C),                  \
141       (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
142 
143 #define _mm_minmax_ps(A, B, C)                                                 \
144   ((__m128)__builtin_ia32_vminmaxps128_mask(                                   \
145       (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
146       (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
147 
148 #define _mm_mask_minmax_ps(W, U, A, B, C)                                      \
149   ((__m128)__builtin_ia32_vminmaxps128_mask(                                   \
150       (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \
151       (__mmask8)(U)))
152 
153 #define _mm_maskz_minmax_ps(U, A, B, C)                                        \
154   ((__m128)__builtin_ia32_vminmaxps128_mask(                                   \
155       (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
156       (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
157 
158 #define _mm256_minmax_ps(A, B, C)                                              \
159   ((__m256)__builtin_ia32_vminmaxps256_round_mask(                             \
160       (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C),                      \
161       (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, _MM_FROUND_NO_EXC))
162 
163 #define _mm256_mask_minmax_ps(W, U, A, B, C)                                   \
164   ((__m256)__builtin_ia32_vminmaxps256_round_mask(                             \
165       (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \
166       (__mmask8)(U), _MM_FROUND_NO_EXC))
167 
168 #define _mm256_maskz_minmax_ps(U, A, B, C)                                     \
169   ((__m256)__builtin_ia32_vminmaxps256_round_mask(                             \
170       (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C),                      \
171       (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), _MM_FROUND_NO_EXC))
172 
173 #define _mm256_minmax_round_ps(A, B, C, R)                                     \
174   ((__m256)__builtin_ia32_vminmaxps256_round_mask(                             \
175       (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C),                      \
176       (__v8sf)_mm256_undefined_ps(), (__mmask8)-1, (int)(R)))
177 
178 #define _mm256_mask_minmax_round_ps(W, U, A, B, C, R)                          \
179   ((__m256)__builtin_ia32_vminmaxps256_round_mask(                             \
180       (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \
181       (__mmask8)(U), (int)(R)))
182 
183 #define _mm256_maskz_minmax_round_ps(U, A, B, C, R)                            \
184   ((__m256)__builtin_ia32_vminmaxps256_round_mask(                             \
185       (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C),                      \
186       (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R)))
187 
188 #define _mm_minmax_sd(A, B, C)                                                 \
189   ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
190       (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
191       (__v2df)_mm_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
192 
193 #define _mm_mask_minmax_sd(W, U, A, B, C)                                      \
194   ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
195       (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
196       (__v2df)(__m128d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
197 
198 #define _mm_maskz_minmax_sd(U, A, B, C)                                        \
199   ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
200       (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
201       (__v2df)_mm_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
202 
203 #define _mm_minmax_round_sd(A, B, C, R)                                        \
204   ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
205       (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
206       (__v2df)_mm_undefined_pd(), (__mmask8)-1, (int)(R)))
207 
208 #define _mm_mask_minmax_round_sd(W, U, A, B, C, R)                             \
209   ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
210       (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
211       (__v2df)(__m128d)(W), (__mmask8)(U), (int)(R)))
212 
213 #define _mm_maskz_minmax_round_sd(U, A, B, C, R)                               \
214   ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
215       (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
216       (__v2df)_mm_setzero_pd(), (__mmask8)(U), (int)(R)))
217 
218 #define _mm_minmax_sh(A, B, C)                                                 \
219   ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
220       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
221       (__v8hf)_mm_undefined_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
222 
223 #define _mm_mask_minmax_sh(W, U, A, B, C)                                      \
224   ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
225       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
226       (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
227 
228 #define _mm_maskz_minmax_sh(U, A, B, C)                                        \
229   ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
230       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
231       (__v8hf)_mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
232 
233 #define _mm_minmax_round_sh(A, B, C, R)                                        \
234   ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
235       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
236       (__v8hf)_mm_undefined_ph(), (__mmask8)-1, (int)(R)))
237 
238 #define _mm_mask_minmax_round_sh(W, U, A, B, C, R)                             \
239   ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
240       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
241       (__v8hf)(__m128h)(W), (__mmask8)(U), (int)(R)))
242 
243 #define _mm_maskz_minmax_round_sh(U, A, B, C, R)                               \
244   ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
245       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
246       (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
247 
248 #define _mm_minmax_ss(A, B, C)                                                 \
249   ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
250       (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
251       (__v4sf)_mm_undefined_ps(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
252 
253 #define _mm_mask_minmax_ss(W, U, A, B, C)                                      \
254   ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
255       (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(W),         \
256       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
257 
258 #define _mm_maskz_minmax_ss(U, A, B, C)                                        \
259   ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
260       (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
261       (__v4sf)_mm_setzero_ps(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
262 
263 #define _mm_minmax_round_ss(A, B, C, R)                                        \
264   ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
265       (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
266       (__v4sf)_mm_undefined_ps(), (__mmask8)-1, (int)(R)))
267 
268 #define _mm_mask_minmax_round_ss(W, U, A, B, C, R)                             \
269   ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
270       (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(W),         \
271       (__mmask8)(U), (int)(R)))
272 
273 #define _mm_maskz_minmax_round_ss(U, A, B, C, R)                               \
274   ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
275       (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
276       (__v4sf)_mm_setzero_ps(), (__mmask8)(U), (int)(R)))
277 #endif // __AVX10_2MINMAXINTRIN_H
278