1 // Copyright 2019 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <stddef.h> 9 #include <stdint.h> 10 11 #include <xnnpack/common.h> 12 #include <xnnpack/microparams.h> 13 14 #ifdef __cplusplus 15 extern "C" { 16 #endif 17 18 19 #define DECLARE_F16_VABS_UKERNEL_FUNCTION(fn_name) \ 20 XNN_INTERNAL void fn_name( \ 21 size_t n, \ 22 const void* x, \ 23 void* y, \ 24 const union xnn_f16_abs_params* params); 25 26 DECLARE_F16_VABS_UKERNEL_FUNCTION(xnn_f16_vabs_ukernel__neonfp16arith_x8) 27 DECLARE_F16_VABS_UKERNEL_FUNCTION(xnn_f16_vabs_ukernel__neonfp16arith_x16) 28 29 DECLARE_F16_VABS_UKERNEL_FUNCTION(xnn_f16_vabs_ukernel__sse2_x8) 30 DECLARE_F16_VABS_UKERNEL_FUNCTION(xnn_f16_vabs_ukernel__sse2_x16) 31 32 33 #define DECLARE_F16_VCLAMP_UKERNEL_FUNCTION(fn_name) \ 34 XNN_INTERNAL void fn_name( \ 35 size_t n, \ 36 const void* x, \ 37 void* y, \ 38 const union xnn_f16_minmax_params* params); 39 40 DECLARE_F16_VCLAMP_UKERNEL_FUNCTION(xnn_f16_vclamp_ukernel__neonfp16arith_x8) 41 DECLARE_F16_VCLAMP_UKERNEL_FUNCTION(xnn_f16_vclamp_ukernel__neonfp16arith_x16) 42 43 DECLARE_F16_VCLAMP_UKERNEL_FUNCTION(xnn_f16_vclamp_ukernel__f16c_x8) 44 DECLARE_F16_VCLAMP_UKERNEL_FUNCTION(xnn_f16_vclamp_ukernel__f16c_x16) 45 46 47 #define DECLARE_F16_VELU_UKERNEL_FUNCTION(fn_name) \ 48 XNN_INTERNAL void fn_name( \ 49 size_t n, \ 50 const void* x, \ 51 void* y, \ 52 const union xnn_f16_elu_params* params); 53 54 DECLARE_F16_VELU_UKERNEL_FUNCTION(xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x8) 55 DECLARE_F16_VELU_UKERNEL_FUNCTION(xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x16) 56 57 DECLARE_F16_VELU_UKERNEL_FUNCTION(xnn_f16_velu_ukernel__avx2_rr1_p3_x8) 58 DECLARE_F16_VELU_UKERNEL_FUNCTION(xnn_f16_velu_ukernel__avx2_rr1_p3_x16) 59 60 61 #define DECLARE_F16_VHSWISH_UKERNEL_FUNCTION(fn_name) \ 62 XNN_INTERNAL void fn_name( \ 63 size_t n, \ 64 const void* x, \ 65 void* y, \ 66 const union xnn_f16_hswish_params* params); 67 68 DECLARE_F16_VHSWISH_UKERNEL_FUNCTION(xnn_f16_vhswish_ukernel__neonfp16arith_x8) 69 DECLARE_F16_VHSWISH_UKERNEL_FUNCTION(xnn_f16_vhswish_ukernel__neonfp16arith_x16) 70 71 DECLARE_F16_VHSWISH_UKERNEL_FUNCTION(xnn_f16_vhswish_ukernel__f16c_x8) 72 DECLARE_F16_VHSWISH_UKERNEL_FUNCTION(xnn_f16_vhswish_ukernel__f16c_x16) 73 74 75 #define DECLARE_F16_VNEG_UKERNEL_FUNCTION(fn_name) \ 76 XNN_INTERNAL void fn_name( \ 77 size_t n, \ 78 const void* x, \ 79 void* y, \ 80 const union xnn_f16_neg_params* params); 81 82 83 DECLARE_F16_VNEG_UKERNEL_FUNCTION(xnn_f16_vneg_ukernel__neonfp16arith_x8) 84 DECLARE_F16_VNEG_UKERNEL_FUNCTION(xnn_f16_vneg_ukernel__neonfp16arith_x16) 85 86 DECLARE_F16_VNEG_UKERNEL_FUNCTION(xnn_f16_vneg_ukernel__sse2_x8) 87 DECLARE_F16_VNEG_UKERNEL_FUNCTION(xnn_f16_vneg_ukernel__sse2_x16) 88 89 90 #define DECLARE_F16_VRND_UKERNEL_FUNCTION(fn_name) \ 91 XNN_INTERNAL void fn_name( \ 92 size_t n, \ 93 const void* x, \ 94 void* y, \ 95 const union xnn_f16_rnd_params* params); 96 97 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndne_ukernel__f16c_x8) 98 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndne_ukernel__f16c_x16) 99 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndne_ukernel__neonfp16arith_x8) 100 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndne_ukernel__neonfp16arith_x16) 101 102 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndz_ukernel__f16c_x8) 103 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndz_ukernel__f16c_x16) 104 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndz_ukernel__neonfp16arith_x8) 105 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndz_ukernel__neonfp16arith_x16) 106 107 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndu_ukernel__f16c_x8) 108 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndu_ukernel__f16c_x16) 109 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndu_ukernel__neonfp16arith_x8) 110 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndu_ukernel__neonfp16arith_x16) 111 112 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndd_ukernel__f16c_x8) 113 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndd_ukernel__f16c_x16) 114 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndd_ukernel__neonfp16arith_x8) 115 DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndd_ukernel__neonfp16arith_x16) 116 117 118 #define DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(fn_name) \ 119 XNN_INTERNAL void fn_name( \ 120 size_t batch, \ 121 const void* input, \ 122 void* output, \ 123 const union xnn_f16_sigmoid_params* params); 124 125 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x8) 126 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x16) 127 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x24) 128 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x32) 129 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x40) 130 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48) 131 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x56) 132 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x64) 133 134 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x8) 135 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x16) 136 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x24) 137 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x32) 138 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x40) 139 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48) 140 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x56) 141 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x64) 142 143 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x8) 144 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x16) 145 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x24) 146 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x32) 147 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x40) 148 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x48) 149 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x56) 150 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64) 151 152 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x8) 153 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16) 154 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24) 155 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x32) 156 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x40) 157 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x48) 158 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x56) 159 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x64) 160 161 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x8) 162 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x16) 163 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x24) 164 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x32) 165 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x40) 166 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x48) 167 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x56) 168 DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x64) 169 170 171 #define DECLARE_F16_VSQR_UKERNEL_FUNCTION(fn_name) \ 172 XNN_INTERNAL void fn_name( \ 173 size_t n, \ 174 const void* x, \ 175 void* y, \ 176 const union xnn_f16_default_params* params); 177 178 DECLARE_F16_VSQR_UKERNEL_FUNCTION(xnn_f16_vsqr_ukernel__neonfp16arith_x8) 179 DECLARE_F16_VSQR_UKERNEL_FUNCTION(xnn_f16_vsqr_ukernel__neonfp16arith_x16) 180 181 DECLARE_F16_VSQR_UKERNEL_FUNCTION(xnn_f16_vsqr_ukernel__f16c_x8) 182 DECLARE_F16_VSQR_UKERNEL_FUNCTION(xnn_f16_vsqr_ukernel__f16c_x16) 183 184 185 #define DECLARE_F16_VSQRT_UKERNEL_FUNCTION(fn_name) \ 186 XNN_INTERNAL void fn_name( \ 187 size_t n, \ 188 const void* x, \ 189 void* y, \ 190 const union xnn_f16_sqrt_params* params); 191 192 DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__neonfp16arith_sqrt_x8) 193 DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__neonfp16arith_sqrt_x16) 194 195 DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__f16c_sqrt_x8) 196 DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__f16c_sqrt_x16) 197 198 199 #define DECLARE_F32_VABS_UKERNEL_FUNCTION(fn_name) \ 200 XNN_INTERNAL void fn_name( \ 201 size_t n, \ 202 const float* x, \ 203 float* y, \ 204 const union xnn_f32_abs_params* params); 205 206 DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__neon_x4) 207 DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__neon_x8) 208 209 DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__sse_x4) 210 DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__sse_x8) 211 212 DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__avx_x8) 213 DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__avx_x16) 214 215 DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__avx512f_x16) 216 DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__avx512f_x32) 217 218 DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__wasmsimd_x4) 219 DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__wasmsimd_x8) 220 221 DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__scalar_x1) 222 DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__scalar_x2) 223 DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__scalar_x4) 224 225 226 #define DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(fn_name) \ 227 XNN_INTERNAL void fn_name( \ 228 size_t n, \ 229 const float* x, \ 230 float* y, \ 231 const union xnn_f32_minmax_params* params); 232 233 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__avx_x8) 234 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__avx_x16) 235 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__avx512f_x16) 236 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__avx512f_x32) 237 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__neon_x4) 238 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__neon_x8) 239 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__scalar_x1) 240 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__scalar_x2) 241 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__scalar_x4) 242 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__sse_x4) 243 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__sse_x8) 244 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__wasm_x1) 245 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__wasm_x2) 246 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__wasm_x4) 247 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__wasmsimd_arm_x4) 248 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__wasmsimd_arm_x8) 249 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__wasmsimd_x86_x4) 250 DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__wasmsimd_x86_x8) 251 252 253 #define DECLARE_F32_VELU_UKERNEL_FUNCTION(fn_name) \ 254 XNN_INTERNAL void fn_name( \ 255 size_t n, \ 256 const float* x, \ 257 float* y, \ 258 const union xnn_f32_elu_params* params); 259 260 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4) 261 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8) 262 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12) 263 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16) 264 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20) 265 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24) 266 267 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_p6_x4) 268 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_p6_x8) 269 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_p6_x12) 270 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_p6_x16) 271 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_p6_x20) 272 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_p6_x24) 273 274 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4) 275 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8) 276 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12) 277 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16) 278 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20) 279 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24) 280 281 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4) 282 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8) 283 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12) 284 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16) 285 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20) 286 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24) 287 288 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4) 289 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8) 290 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12) 291 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16) 292 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20) 293 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24) 294 295 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_p6_x4) 296 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_p6_x8) 297 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_p6_x12) 298 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_p6_x16) 299 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_p6_x20) 300 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_p6_x24) 301 302 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4) 303 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8) 304 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12) 305 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16) 306 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20) 307 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24) 308 309 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_p6_x4) 310 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_p6_x8) 311 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_p6_x12) 312 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_p6_x16) 313 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_p6_x20) 314 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_p6_x24) 315 316 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8) 317 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16) 318 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24) 319 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32) 320 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40) 321 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48) 322 323 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8) 324 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16) 325 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24) 326 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32) 327 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40) 328 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48) 329 330 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_p6_x8) 331 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_p6_x16) 332 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_p6_x24) 333 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_p6_x32) 334 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_p6_x40) 335 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_p6_x48) 336 337 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8) 338 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16) 339 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24) 340 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32) 341 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40) 342 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48) 343 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56) 344 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64) 345 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72) 346 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80) 347 348 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8) 349 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16) 350 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24) 351 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32) 352 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40) 353 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48) 354 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56) 355 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64) 356 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72) 357 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80) 358 359 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8) 360 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16) 361 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24) 362 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32) 363 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40) 364 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48) 365 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56) 366 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64) 367 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72) 368 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80) 369 370 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_x8) 371 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_x16) 372 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_x24) 373 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_x32) 374 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_x40) 375 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_x48) 376 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_x56) 377 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_x64) 378 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_x72) 379 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_x80) 380 381 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16) 382 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32) 383 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48) 384 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64) 385 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80) 386 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96) 387 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112) 388 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128) 389 390 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16) 391 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32) 392 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48) 393 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64) 394 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80) 395 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96) 396 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112) 397 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128) 398 399 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4) 400 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8) 401 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12) 402 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16) 403 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20) 404 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24) 405 406 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4) 407 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8) 408 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12) 409 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16) 410 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20) 411 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24) 412 413 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4) 414 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8) 415 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12) 416 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16) 417 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20) 418 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24) 419 420 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4) 421 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8) 422 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12) 423 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16) 424 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20) 425 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24) 426 427 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1) 428 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2) 429 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3) 430 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4) 431 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5) 432 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6) 433 434 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_p6_x1) 435 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_p6_x2) 436 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_p6_x3) 437 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_p6_x4) 438 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_p6_x5) 439 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_p6_x6) 440 441 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1) 442 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2) 443 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3) 444 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4) 445 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5) 446 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6) 447 448 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_p6_x1) 449 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_p6_x2) 450 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_p6_x3) 451 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_p6_x4) 452 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_p6_x5) 453 DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_p6_x6) 454 455 456 #define DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(fn_name) \ 457 XNN_INTERNAL void fn_name( \ 458 size_t n, \ 459 const float* x, \ 460 float* y, \ 461 const union xnn_f32_hswish_params* params); 462 463 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__neon_x4) 464 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__neon_x8) 465 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__neon_x16) 466 467 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__sse_x4) 468 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__sse_x8) 469 470 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__avx_x8) 471 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__avx_x16) 472 473 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__fma3_x8) 474 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__fma3_x16) 475 476 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__avx512f_x16) 477 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__avx512f_x32) 478 479 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__wasmsimd_x4) 480 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__wasmsimd_x8) 481 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__wasmsimd_x16) 482 483 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__wasm_x1) 484 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__wasm_x2) 485 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__wasm_x4) 486 487 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__scalar_x1) 488 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__scalar_x2) 489 DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__scalar_x4) 490 491 492 #define DECLARE_F16_VLRELU_UKERNEL_FUNCTION(fn_name) \ 493 XNN_INTERNAL void fn_name( \ 494 size_t n, \ 495 const void* x, \ 496 void* y, \ 497 const union xnn_f16_lrelu_params* params); 498 499 DECLARE_F16_VLRELU_UKERNEL_FUNCTION(xnn_f16_vlrelu_ukernel__neonfp16arith_x8) 500 DECLARE_F16_VLRELU_UKERNEL_FUNCTION(xnn_f16_vlrelu_ukernel__neonfp16arith_x16) 501 502 DECLARE_F16_VLRELU_UKERNEL_FUNCTION(xnn_f16_vlrelu_ukernel__f16c_x8) 503 DECLARE_F16_VLRELU_UKERNEL_FUNCTION(xnn_f16_vlrelu_ukernel__f16c_x16) 504 505 506 #define DECLARE_F32_VLRELU_UKERNEL_FUNCTION(fn_name) \ 507 XNN_INTERNAL void fn_name( \ 508 size_t n, \ 509 const float* x, \ 510 float* y, \ 511 const union xnn_f32_lrelu_params* params); 512 513 514 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__neon_x4) 515 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__neon_x8) 516 517 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__sse_x4) 518 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__sse_x8) 519 520 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__sse2_x4) 521 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__sse2_x8) 522 523 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__sse41_x4) 524 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__sse41_x8) 525 526 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__avx_x8) 527 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__avx_x16) 528 529 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__avx512f_x16) 530 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__avx512f_x32) 531 532 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x4) 533 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8) 534 535 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x4) 536 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8) 537 538 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasm_x1) 539 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasm_x2) 540 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasm_x4) 541 542 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__scalar_x1) 543 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__scalar_x2) 544 DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__scalar_x4) 545 546 547 #define DECLARE_F32_VNEG_UKERNEL_FUNCTION(fn_name) \ 548 XNN_INTERNAL void fn_name( \ 549 size_t n, \ 550 const float* x, \ 551 float* y, \ 552 const union xnn_f32_neg_params* params); 553 554 555 DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__neon_x4) 556 DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__neon_x8) 557 558 DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__sse_x4) 559 DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__sse_x8) 560 561 DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__avx_x8) 562 DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__avx_x16) 563 564 DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__avx512f_x16) 565 DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__avx512f_x32) 566 567 DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__wasmsimd_x4) 568 DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__wasmsimd_x8) 569 570 DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__scalar_x1) 571 DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__scalar_x2) 572 DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__scalar_x4) 573 574 575 #define DECLARE_F32_VRELU_UKERNEL_FUNCTION(fn_name) \ 576 XNN_INTERNAL void fn_name( \ 577 size_t n, \ 578 const float* x, \ 579 float* y, \ 580 const union xnn_f32_relu_params* params); 581 582 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__avx_x8) 583 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__avx_x16) 584 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__avx512f_x16) 585 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__avx512f_x32) 586 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__neon_x4) 587 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__neon_x8) 588 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__scalar_x1) 589 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__scalar_x2) 590 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__scalar_x4) 591 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__scalar_x8) 592 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__sse_x4) 593 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__sse_x8) 594 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasm_x1) 595 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasm_x2) 596 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasm_x4) 597 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasm_x8) 598 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasmsimd_x4) 599 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasmsimd_x8) 600 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasmsimd_x16) 601 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasm32_shr_x1) 602 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasm32_shr_x2) 603 DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasm32_shr_x4) 604 605 606 #define DECLARE_F32_VRND_UKERNEL_FUNCTION(fn_name) \ 607 XNN_INTERNAL void fn_name( \ 608 size_t n, \ 609 const float* x, \ 610 float* y, \ 611 const union xnn_f32_rnd_params* params); 612 613 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__avx_x8) 614 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__avx_x16) 615 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__avx512f_x16) 616 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__avx512f_x32) 617 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__neon_x4) 618 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__neon_x8) 619 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__neonv8_x4) 620 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__neonv8_x8) 621 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__scalar_libm_x1) 622 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__scalar_libm_x2) 623 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__scalar_libm_x4) 624 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__sse2_x4) 625 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__sse2_x8) 626 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__sse41_x4) 627 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__sse41_x8) 628 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__wasmsimd_x4) 629 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__wasmsimd_x8) 630 631 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__avx_x8) 632 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__avx_x16) 633 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__avx512f_x16) 634 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__avx512f_x32) 635 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__neon_x4) 636 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__neon_x8) 637 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__neonv8_x4) 638 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__neonv8_x8) 639 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__scalar_libm_x1) 640 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__scalar_libm_x2) 641 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__scalar_libm_x4) 642 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__sse2_x4) 643 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__sse2_x8) 644 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__sse41_x4) 645 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__sse41_x8) 646 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__wasmsimd_x4) 647 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__wasmsimd_x8) 648 649 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__avx_x8) 650 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__avx_x16) 651 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__avx512f_x16) 652 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__avx512f_x32) 653 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__neon_x4) 654 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__neon_x8) 655 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__neonv8_x4) 656 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__neonv8_x8) 657 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__scalar_libm_x1) 658 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__scalar_libm_x2) 659 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__scalar_libm_x4) 660 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__sse2_x4) 661 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__sse2_x8) 662 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__sse41_x4) 663 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__sse41_x8) 664 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__wasmsimd_x4) 665 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__wasmsimd_x8) 666 667 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__avx_x8) 668 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__avx_x16) 669 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__avx512f_x16) 670 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__avx512f_x32) 671 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__neon_x4) 672 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__neon_x8) 673 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__neonv8_x4) 674 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__neonv8_x8) 675 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__scalar_libm_x1) 676 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__scalar_libm_x2) 677 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__scalar_libm_x4) 678 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__sse2_x4) 679 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__sse2_x8) 680 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__sse41_x4) 681 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__sse41_x8) 682 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__wasmsimd_x4) 683 DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__wasmsimd_x8) 684 685 686 #define DECLARE_F32_VSQRT_UKERNEL_FUNCTION(fn_name) \ 687 XNN_INTERNAL void fn_name( \ 688 size_t n, \ 689 const float* x, \ 690 float* y, \ 691 const union xnn_f32_sqrt_params* params); 692 693 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neon_sqrt_x4) 694 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neon_sqrt_x8) 695 696 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4) 697 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8) 698 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12) 699 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16) 700 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20) 701 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24) 702 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28) 703 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32) 704 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36) 705 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40) 706 707 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4) 708 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8) 709 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12) 710 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16) 711 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20) 712 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24) 713 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28) 714 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32) 715 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36) 716 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40) 717 718 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__sse_sqrt_x4) 719 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__sse_sqrt_x8) 720 721 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx_sqrt_x8) 722 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx_sqrt_x16) 723 724 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8) 725 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16) 726 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24) 727 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32) 728 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40) 729 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48) 730 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56) 731 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64) 732 733 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16) 734 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32) 735 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48) 736 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64) 737 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80) 738 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96) 739 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112) 740 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128) 741 742 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4) 743 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8) 744 745 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__scalar_sqrt_x1) 746 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2) 747 DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4) 748 749 750 #define DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(fn_name) \ 751 XNN_INTERNAL void fn_name( \ 752 size_t n, \ 753 const float* x, \ 754 float* y, \ 755 const union xnn_f32_sigmoid_params* params); 756 757 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x4) 758 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x8) 759 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x12) 760 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x16) 761 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x20) 762 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x24) 763 764 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x4) 765 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8) 766 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12) 767 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16) 768 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20) 769 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24) 770 771 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x4) 772 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8) 773 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12) 774 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16) 775 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20) 776 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24) 777 778 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x4) 779 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8) 780 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12) 781 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16) 782 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20) 783 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24) 784 785 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x4) 786 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x8) 787 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x12) 788 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x16) 789 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20) 790 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x24) 791 792 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4) 793 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8) 794 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12) 795 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16) 796 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20) 797 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24) 798 799 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4) 800 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8) 801 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12) 802 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x16) 803 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20) 804 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24) 805 806 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4) 807 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8) 808 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12) 809 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x16) 810 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20) 811 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24) 812 813 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4) 814 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8) 815 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12) 816 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16) 817 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20) 818 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24) 819 820 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4) 821 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8) 822 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12) 823 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16) 824 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20) 825 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24) 826 827 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4) 828 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8) 829 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12) 830 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16) 831 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20) 832 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24) 833 834 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4) 835 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8) 836 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12) 837 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16) 838 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20) 839 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24) 840 841 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4) 842 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8) 843 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12) 844 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16) 845 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20) 846 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24) 847 848 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4) 849 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8) 850 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12) 851 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16) 852 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20) 853 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24) 854 855 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4) 856 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8) 857 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12) 858 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16) 859 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20) 860 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24) 861 862 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x4) 863 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8) 864 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12) 865 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x16) 866 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20) 867 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x24) 868 869 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x4) 870 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8) 871 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12) 872 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16) 873 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20) 874 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24) 875 876 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x4) 877 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8) 878 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12) 879 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16) 880 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20) 881 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24) 882 883 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4) 884 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8) 885 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12) 886 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16) 887 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20) 888 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24) 889 890 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x8) 891 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x16) 892 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x24) 893 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x32) 894 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40) 895 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x48) 896 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56) 897 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x64) 898 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x72) 899 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x80) 900 901 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x8) 902 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16) 903 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24) 904 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x32) 905 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x40) 906 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x48) 907 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x56) 908 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x64) 909 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x72) 910 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x80) 911 912 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x8) 913 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x16) 914 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x24) 915 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x32) 916 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40) 917 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48) 918 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x56) 919 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x64) 920 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x72) 921 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x80) 922 923 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x8) 924 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x16) 925 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x24) 926 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x32) 927 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40) 928 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48) 929 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x56) 930 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x64) 931 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x72) 932 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x80) 933 934 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x8) 935 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16) 936 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24) 937 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32) 938 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40) 939 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48) 940 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56) 941 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64) 942 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x72) 943 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x80) 944 945 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x16) 946 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x32) 947 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x48) 948 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x64) 949 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80) 950 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96) 951 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112) 952 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128) 953 954 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x16) 955 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32) 956 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48) 957 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64) 958 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80) 959 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96) 960 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112) 961 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128) 962 963 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16) 964 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x32) 965 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x48) 966 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x64) 967 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80) 968 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96) 969 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112) 970 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128) 971 972 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16) 973 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32) 974 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48) 975 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64) 976 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80) 977 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96) 978 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112) 979 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128) 980 981 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16) 982 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x32) 983 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x48) 984 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64) 985 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80) 986 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96) 987 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112) 988 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128) 989 990 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16) 991 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32) 992 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48) 993 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64) 994 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80) 995 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96) 996 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112) 997 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128) 998 999 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x4) 1000 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8) 1001 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12) 1002 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x16) 1003 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20) 1004 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24) 1005 1006 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4) 1007 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8) 1008 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12) 1009 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x16) 1010 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20) 1011 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x24) 1012 1013 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x1) 1014 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x2) 1015 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x4) 1016 1017 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x1) 1018 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x2) 1019 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x4) 1020 1021 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x1) 1022 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x2) 1023 DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x4) 1024 1025 1026 #define DECLARE_F32_VSQR_UKERNEL_FUNCTION(fn_name) \ 1027 XNN_INTERNAL void fn_name( \ 1028 size_t n, \ 1029 const float* x, \ 1030 float* y, \ 1031 const union xnn_f32_default_params* params); 1032 1033 DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__neon_x4) 1034 DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__neon_x8) 1035 1036 DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__sse_x4) 1037 DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__sse_x8) 1038 1039 DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__avx_x8) 1040 DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__avx_x16) 1041 1042 DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__avx512f_x16) 1043 DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__avx512f_x32) 1044 1045 DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__wasmsimd_x4) 1046 DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__wasmsimd_x8) 1047 1048 DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__scalar_x1) 1049 DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__scalar_x2) 1050 DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__scalar_x4) 1051 1052 1053 #define DECLARE_S8_VCLAMP_UKERNEL_FUNCTION(fn_name) \ 1054 XNN_INTERNAL void fn_name( \ 1055 size_t n, \ 1056 const int8_t* x, \ 1057 int8_t* y, \ 1058 const union xnn_s8_minmax_params* params); 1059 1060 DECLARE_S8_VCLAMP_UKERNEL_FUNCTION(xnn_s8_vclamp_ukernel__neon_x64) 1061 DECLARE_S8_VCLAMP_UKERNEL_FUNCTION(xnn_s8_vclamp_ukernel__scalar_x4) 1062 DECLARE_S8_VCLAMP_UKERNEL_FUNCTION(xnn_s8_vclamp_ukernel__sse2_x64) 1063 DECLARE_S8_VCLAMP_UKERNEL_FUNCTION(xnn_s8_vclamp_ukernel__sse41_x64) 1064 DECLARE_S8_VCLAMP_UKERNEL_FUNCTION(xnn_s8_vclamp_ukernel__wasmsimd_x64) 1065 1066 1067 #define DECLARE_U8_VCLAMP_UKERNEL_FUNCTION(fn_name) \ 1068 XNN_INTERNAL void fn_name( \ 1069 size_t n, \ 1070 const uint8_t* x, \ 1071 uint8_t* y, \ 1072 const union xnn_u8_minmax_params* params); 1073 1074 DECLARE_U8_VCLAMP_UKERNEL_FUNCTION(xnn_u8_vclamp_ukernel__neon_x64) 1075 DECLARE_U8_VCLAMP_UKERNEL_FUNCTION(xnn_u8_vclamp_ukernel__scalar_x4) 1076 DECLARE_U8_VCLAMP_UKERNEL_FUNCTION(xnn_u8_vclamp_ukernel__sse2_x64) 1077 DECLARE_U8_VCLAMP_UKERNEL_FUNCTION(xnn_u8_vclamp_ukernel__wasmsimd_x64) 1078 1079 1080 #define DECLARE_U64_U32_VSQRTSHIFT_UKERNEL_FUNCTION(fn_name) \ 1081 XNN_INTERNAL void fn_name( \ 1082 size_t n, \ 1083 const uint64_t* x, \ 1084 uint32_t* y, \ 1085 uint32_t shift); 1086 1087 DECLARE_U64_U32_VSQRTSHIFT_UKERNEL_FUNCTION(xnn_u64_u32_vsqrtshift_ukernel__scalar_cvtu32_sqrt_cvtu32f64_x1) 1088 1089 1090 #define DECLARE_XX_VUNARY_UKERNEL_FUNCTION(fn_name) \ 1091 XNN_INTERNAL void fn_name( \ 1092 size_t size, \ 1093 const void* input, \ 1094 void* output, \ 1095 const void* params); 1096 1097 DECLARE_XX_VUNARY_UKERNEL_FUNCTION(xnn_xx_copy_ukernel__memcpy) 1098 1099 #ifdef __cplusplus 1100 } // extern "C" 1101 #endif 1102