1// Copyright (C) 2020 The Android Open Source Project 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15OPERATOR_SRCS = [ 16 "src/add-nc.c", 17 "src/argmax-pooling-nhwc.c", 18 "src/average-pooling-nhwc.c", 19 "src/binary-elementwise-nd.c", 20 "src/channel-pad-nc.c", 21 "src/channel-shuffle-nc.c", 22 "src/clamp-nc.c", 23 "src/convolution-nchw.c", 24 "src/convolution-nhwc.c", 25 "src/deconvolution-nhwc.c", 26 "src/fully-connected-nc.c", 27 "src/global-average-pooling-ncw.c", 28 "src/global-average-pooling-nwc.c", 29 "src/hardswish-nc.c", 30 "src/leaky-relu-nc.c", 31 "src/max-pooling-nhwc.c", 32 "src/prelu-nc.c", 33 "src/resize-bilinear-nhwc.c", 34 "src/sigmoid-nc.c", 35 "src/softmax-nc.c", 36 "src/unpooling-nhwc.c", 37] 38 39TABLE_SRCS = [ 40 "src/tables/exp2-k-over-64.c", 41 "src/tables/exp2-k-over-2048.c", 42] 43 44SCALAR_UKERNELS = [ 45 "src/f32-argmaxpool/4x-scalar-c1.c", 46 "src/f32-argmaxpool/9p8x-scalar-c1.c", 47 "src/f32-argmaxpool/9x-scalar-c1.c", 48 "src/f32-avgpool/mp9p8q-scalar.c", 49 "src/f32-avgpool/up9-scalar.c", 50 "src/f32-bilinear/gen/scalar-c1.c", 51 "src/f32-bilinear/gen/scalar-c2.c", 52 "src/f32-bilinear/gen/scalar-c4.c", 53 "src/f32-clamp/scalar.c", 54 "src/f32-conv-hwc/3x3s2p0p1c3x4-scalar-1x1.c", 55 "src/f32-conv-hwc/3x3s2p1c3x4-scalar-1x1.c", 56 "src/f32-conv-hwc2spchw/3x3s2p1c3x4-scalar-1x1.c", 57 "src/f32-dwconv-spchw/3x3p1-scalar.c", 58 "src/f32-dwconv-spchw/3x3s2p1-scalar.c", 59 "src/f32-dwconv-spchw/5x5p2-scalar.c", 60 "src/f32-dwconv-spchw/5x5s2p2-scalar.c", 61 "src/f32-dwconv/gen/up1x25-scalar-acc2.c", 62 "src/f32-dwconv/gen/up1x25-scalar.c", 63 "src/f32-dwconv/gen/up1x4-scalar-acc2.c", 64 "src/f32-dwconv/gen/up1x4-scalar.c", 65 "src/f32-dwconv/gen/up1x9-scalar-acc2.c", 66 "src/f32-dwconv/gen/up1x9-scalar.c", 67 "src/f32-dwconv/gen/up2x25-scalar-acc2.c", 68 "src/f32-dwconv/gen/up2x25-scalar.c", 69 "src/f32-dwconv/gen/up2x4-scalar-acc2.c", 70 "src/f32-dwconv/gen/up2x4-scalar.c", 71 "src/f32-dwconv/gen/up2x9-scalar-acc2.c", 72 "src/f32-dwconv/gen/up2x9-scalar.c", 73 "src/f32-gavgpool-spchw/scalar-x1.c", 74 "src/f32-gavgpool/mp7p7q-scalar.c", 75 "src/f32-gavgpool/up7-scalar.c", 76 "src/f32-gemm/gen-inc/1x4-scalar.c", 77 "src/f32-gemm/gen-inc/2x4-scalar.c", 78 "src/f32-gemm/gen-inc/4x4-scalar.c", 79 "src/f32-gemm/gen/1x4-scalar.c", 80 "src/f32-gemm/gen/2x4-scalar.c", 81 "src/f32-gemm/gen/4x2-scalar.c", 82 "src/f32-gemm/gen/4x4-scalar.c", 83 "src/f32-hswish/gen/scalar-x1.c", 84 "src/f32-hswish/gen/scalar-x2.c", 85 "src/f32-hswish/gen/scalar-x4.c", 86 "src/f32-igemm/gen/1x4-scalar.c", 87 "src/f32-igemm/gen/2x4-scalar.c", 88 "src/f32-igemm/gen/4x2-scalar.c", 89 "src/f32-igemm/gen/4x4-scalar.c", 90 "src/f32-maxpool/9p8x-scalar-c1.c", 91 "src/f32-pavgpool/mp9p8q-scalar.c", 92 "src/f32-pavgpool/up9-scalar.c", 93 "src/f32-ppmm/gen/2x4-scalar.c", 94 "src/f32-ppmm/gen/3x3-scalar.c", 95 "src/f32-ppmm/gen/4x2-scalar.c", 96 "src/f32-ppmm/gen/4x4-scalar.c", 97 "src/f32-prelu/gen/scalar-2x1.c", 98 "src/f32-prelu/gen/scalar-2x4.c", 99 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x1.c", 100 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x2.c", 101 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x2-acc2.c", 102 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x4.c", 103 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x4-acc2.c", 104 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x4-acc4.c", 105 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x1.c", 106 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x2.c", 107 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x2-acc2.c", 108 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x4.c", 109 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x4-acc2.c", 110 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x4-acc4.c", 111 "src/f32-rmax/scalar.c", 112 "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x1.c", 113 "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x2.c", 114 "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x4.c", 115 "src/f32-sigmoid/gen/scalar-lut64-p2-div-x1.c", 116 "src/f32-sigmoid/gen/scalar-lut64-p2-div-x2.c", 117 "src/f32-sigmoid/gen/scalar-lut64-p2-div-x4.c", 118 "src/f32-sigmoid/gen/scalar-p5-div-x1.c", 119 "src/f32-sigmoid/gen/scalar-p5-div-x2.c", 120 "src/f32-sigmoid/gen/scalar-p5-div-x4.c", 121 "src/f32-spmm/gen/1x1-scalar-pipelined.c", 122 "src/f32-spmm/gen/1x1-scalar.c", 123 "src/f32-spmm/gen/2x1-scalar-pipelined.c", 124 "src/f32-spmm/gen/2x1-scalar.c", 125 "src/f32-spmm/gen/4x1-scalar-pipelined.c", 126 "src/f32-spmm/gen/4x1-scalar.c", 127 "src/f32-spmm/gen/8x1-scalar-pipelined.c", 128 "src/f32-spmm/gen/8x1-scalar.c", 129 "src/f32-spmm/gen/8x2-scalar.c", 130 "src/f32-spmm/gen/8x4-scalar.c", 131 "src/f32-vbinary/gen/vadd-scalar-x1.c", 132 "src/f32-vbinary/gen/vadd-scalar-x2.c", 133 "src/f32-vbinary/gen/vadd-scalar-x4.c", 134 "src/f32-vbinary/gen/vaddc-scalar-x1.c", 135 "src/f32-vbinary/gen/vaddc-scalar-x2.c", 136 "src/f32-vbinary/gen/vaddc-scalar-x4.c", 137 "src/f32-vbinary/gen/vdiv-scalar-x1.c", 138 "src/f32-vbinary/gen/vdiv-scalar-x2.c", 139 "src/f32-vbinary/gen/vdiv-scalar-x4.c", 140 "src/f32-vbinary/gen/vdivc-scalar-x1.c", 141 "src/f32-vbinary/gen/vdivc-scalar-x2.c", 142 "src/f32-vbinary/gen/vdivc-scalar-x4.c", 143 "src/f32-vbinary/gen/vmax-scalar-x1.c", 144 "src/f32-vbinary/gen/vmax-scalar-x2.c", 145 "src/f32-vbinary/gen/vmax-scalar-x4.c", 146 "src/f32-vbinary/gen/vmaxc-scalar-x1.c", 147 "src/f32-vbinary/gen/vmaxc-scalar-x2.c", 148 "src/f32-vbinary/gen/vmaxc-scalar-x4.c", 149 "src/f32-vbinary/gen/vmin-scalar-x1.c", 150 "src/f32-vbinary/gen/vmin-scalar-x2.c", 151 "src/f32-vbinary/gen/vmin-scalar-x4.c", 152 "src/f32-vbinary/gen/vminc-scalar-x1.c", 153 "src/f32-vbinary/gen/vminc-scalar-x2.c", 154 "src/f32-vbinary/gen/vminc-scalar-x4.c", 155 "src/f32-vbinary/gen/vmul-scalar-x1.c", 156 "src/f32-vbinary/gen/vmul-scalar-x2.c", 157 "src/f32-vbinary/gen/vmul-scalar-x4.c", 158 "src/f32-vbinary/gen/vmulc-scalar-x1.c", 159 "src/f32-vbinary/gen/vmulc-scalar-x2.c", 160 "src/f32-vbinary/gen/vmulc-scalar-x4.c", 161 "src/f32-vbinary/gen/vrdivc-scalar-x1.c", 162 "src/f32-vbinary/gen/vrdivc-scalar-x2.c", 163 "src/f32-vbinary/gen/vrdivc-scalar-x4.c", 164 "src/f32-vbinary/gen/vrsubc-scalar-x1.c", 165 "src/f32-vbinary/gen/vrsubc-scalar-x2.c", 166 "src/f32-vbinary/gen/vrsubc-scalar-x4.c", 167 "src/f32-vbinary/gen/vsub-scalar-x1.c", 168 "src/f32-vbinary/gen/vsub-scalar-x2.c", 169 "src/f32-vbinary/gen/vsub-scalar-x4.c", 170 "src/f32-vbinary/gen/vsubc-scalar-x1.c", 171 "src/f32-vbinary/gen/vsubc-scalar-x2.c", 172 "src/f32-vbinary/gen/vsubc-scalar-x4.c", 173 "src/f32-vmulcaddc/gen/c1-scalar-2x.c", 174 "src/f32-vmulcaddc/gen/c2-scalar-2x.c", 175 "src/f32-vmulcaddc/gen/c4-scalar-2x.c", 176 "src/math/expminus-scalar-lut2048-p1.c", 177 "src/math/expminus-scalar-lut64-p2.c", 178 "src/math/expminus-scalar-p5.c", 179 "src/math/sigmoid-scalar-lut2048-p1-div.c", 180 "src/math/sigmoid-scalar-lut64-p2-div.c", 181 "src/math/sigmoid-scalar-p5-div.c", 182 "src/q8-avgpool/mp9p8q-scalar.c", 183 "src/q8-avgpool/up9-scalar.c", 184 "src/q8-dwconv/up1x9-scalar.c", 185 "src/q8-gavgpool/mp7p7q-scalar.c", 186 "src/q8-gavgpool/up7-scalar.c", 187 "src/q8-gemm/2x2-scalar.c", 188 "src/q8-igemm/2x2-scalar.c", 189 "src/q8-vadd/scalar.c", 190 "src/u8-clamp/scalar.c", 191 "src/u8-lut32norm/scalar.c", 192 "src/u8-maxpool/9p8x-scalar-c1.c", 193 "src/u8-rmax/scalar.c", 194 "src/x32-packx/x2-scalar.c", 195 "src/x32-packx/x3-scalar.c", 196 "src/x32-packx/x4-scalar.c", 197 "src/x32-pad/x2-scalar.c", 198 "src/x32-unpool/scalar.c", 199 "src/x32-zip/x2-scalar.c", 200 "src/x32-zip/x3-scalar.c", 201 "src/x32-zip/x4-scalar.c", 202 "src/x32-zip/xm-scalar.c", 203 "src/x8-lut/scalar.c", 204 "src/x8-zip/x2-scalar.c", 205 "src/x8-zip/x3-scalar.c", 206 "src/x8-zip/x4-scalar.c", 207 "src/x8-zip/xm-scalar.c", 208] 209 210PSIMD_FASTMATH_UKERNELS = [ 211 "src/f32-argmaxpool/4x-psimd-c4.c", 212 "src/f32-argmaxpool/9p8x-psimd-c4.c", 213 "src/f32-argmaxpool/9x-psimd-c4.c", 214 "src/f32-avgpool/mp9p8q-psimd.c", 215 "src/f32-avgpool/up9-psimd.c", 216 "src/f32-bilinear/gen/psimd-c4.c", 217 "src/f32-bilinear/gen/psimd-c8.c", 218 "src/f32-clamp/psimd.c", 219 "src/f32-dwconv/gen/up4x25-psimd-acc2.c", 220 "src/f32-dwconv/gen/up4x25-psimd.c", 221 "src/f32-dwconv/gen/up4x4-psimd-acc2.c", 222 "src/f32-dwconv/gen/up4x4-psimd.c", 223 "src/f32-dwconv/gen/up4x9-psimd-acc2.c", 224 "src/f32-dwconv/gen/up4x9-psimd.c", 225 "src/f32-dwconv/gen/up8x25-psimd-acc2.c", 226 "src/f32-dwconv/gen/up8x25-psimd.c", 227 "src/f32-dwconv/gen/up8x4-psimd-acc2.c", 228 "src/f32-dwconv/gen/up8x4-psimd.c", 229 "src/f32-dwconv/gen/up8x9-psimd-acc2.c", 230 "src/f32-dwconv/gen/up8x9-psimd.c", 231 "src/f32-gavgpool/mp7p7q-psimd.c", 232 "src/f32-gavgpool/up7-psimd.c", 233 "src/f32-gemm/gen/1x8-psimd-loadsplat.c", 234 "src/f32-gemm/gen/1x8-psimd-splat.c", 235 "src/f32-gemm/gen/1x8s4-psimd.c", 236 "src/f32-gemm/gen/4x8-psimd-loadsplat.c", 237 "src/f32-gemm/gen/4x8-psimd-splat.c", 238 "src/f32-gemm/gen/4x8s4-psimd.c", 239 "src/f32-gemm/gen/6x8-psimd-loadsplat.c", 240 "src/f32-gemm/gen/6x8-psimd-splat.c", 241 "src/f32-gemm/gen/6x8s4-psimd.c", 242 "src/f32-gemm/gen-inc/1x8-psimd-loadsplat.c", 243 "src/f32-gemm/gen-inc/1x8-psimd-splat.c", 244 "src/f32-gemm/gen-inc/1x8s4-psimd.c", 245 "src/f32-gemm/gen-inc/4x8-psimd-loadsplat.c", 246 "src/f32-gemm/gen-inc/4x8-psimd-splat.c", 247 "src/f32-gemm/gen-inc/4x8s4-psimd.c", 248 "src/f32-gemm/gen-inc/6x8-psimd-loadsplat.c", 249 "src/f32-gemm/gen-inc/6x8-psimd-splat.c", 250 "src/f32-gemm/gen-inc/6x8s4-psimd.c", 251 "src/f32-hswish/gen/psimd-x4.c", 252 "src/f32-hswish/gen/psimd-x8.c", 253 "src/f32-igemm/gen/1x8-psimd-loadsplat.c", 254 "src/f32-igemm/gen/1x8-psimd-splat.c", 255 "src/f32-igemm/gen/1x8s4-psimd.c", 256 "src/f32-igemm/gen/4x2c4-psimd.c", 257 "src/f32-igemm/gen/4x8-psimd-loadsplat.c", 258 "src/f32-igemm/gen/4x8-psimd-splat.c", 259 "src/f32-igemm/gen/4x8s4-psimd.c", 260 "src/f32-igemm/gen/6x8-psimd-loadsplat.c", 261 "src/f32-igemm/gen/6x8-psimd-splat.c", 262 "src/f32-igemm/gen/6x8s4-psimd.c", 263 "src/f32-maxpool/9p8x-psimd-c4.c", 264 "src/f32-pavgpool/mp9p8q-psimd.c", 265 "src/f32-pavgpool/up9-psimd.c", 266 "src/f32-ppmm/gen/4x8-psimd.c", 267 "src/f32-prelu/gen/psimd-2x4.c", 268 "src/f32-prelu/gen/psimd-2x8.c", 269 "src/f32-rmax/psimd.c", 270 "src/f32-vbinary/gen/vadd-psimd-x4.c", 271 "src/f32-vbinary/gen/vadd-psimd-x8.c", 272 "src/f32-vbinary/gen/vaddc-psimd-x4.c", 273 "src/f32-vbinary/gen/vaddc-psimd-x8.c", 274 "src/f32-vbinary/gen/vdiv-psimd-x4.c", 275 "src/f32-vbinary/gen/vdiv-psimd-x8.c", 276 "src/f32-vbinary/gen/vdivc-psimd-x4.c", 277 "src/f32-vbinary/gen/vdivc-psimd-x8.c", 278 "src/f32-vbinary/gen/vmax-psimd-x4.c", 279 "src/f32-vbinary/gen/vmax-psimd-x8.c", 280 "src/f32-vbinary/gen/vmaxc-psimd-x4.c", 281 "src/f32-vbinary/gen/vmaxc-psimd-x8.c", 282 "src/f32-vbinary/gen/vmin-psimd-x4.c", 283 "src/f32-vbinary/gen/vmin-psimd-x8.c", 284 "src/f32-vbinary/gen/vminc-psimd-x4.c", 285 "src/f32-vbinary/gen/vminc-psimd-x8.c", 286 "src/f32-vbinary/gen/vmul-psimd-x4.c", 287 "src/f32-vbinary/gen/vmul-psimd-x8.c", 288 "src/f32-vbinary/gen/vmulc-psimd-x4.c", 289 "src/f32-vbinary/gen/vmulc-psimd-x8.c", 290 "src/f32-vbinary/gen/vrdivc-psimd-x4.c", 291 "src/f32-vbinary/gen/vrdivc-psimd-x8.c", 292 "src/f32-vbinary/gen/vrsubc-psimd-x4.c", 293 "src/f32-vbinary/gen/vrsubc-psimd-x8.c", 294 "src/f32-vbinary/gen/vsub-psimd-x4.c", 295 "src/f32-vbinary/gen/vsub-psimd-x8.c", 296 "src/f32-vbinary/gen/vsubc-psimd-x4.c", 297 "src/f32-vbinary/gen/vsubc-psimd-x8.c", 298 "src/f32-vmulcaddc/gen/c4-psimd-2x.c", 299 "src/f32-vmulcaddc/gen/c8-psimd-2x.c", 300 "src/x32-packx/x4-psimd.c", 301 "src/x32-pad/x2-psimd.c", 302 "src/x32-unpool/psimd.c", 303 "src/x32-zip/x2-psimd.c", 304 "src/x32-zip/x3-psimd.c", 305 "src/x32-zip/x4-psimd.c", 306 "src/x32-zip/xm-psimd.c", 307] 308 309PSIMD_ACCMATH_UKERNELS = [ 310 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x4.c", 311 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x8.c", 312 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x8-acc2.c", 313 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x12.c", 314 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x12-acc2.c", 315 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x12-acc3.c", 316 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x16.c", 317 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x16-acc2.c", 318 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x16-acc4.c", 319 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x20.c", 320 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x20-acc2.c", 321 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x20-acc5.c", 322 "src/f32-sigmoid/gen/psimd-p5-div-x4.c", 323 "src/f32-sigmoid/gen/psimd-p5-div-x8.c", 324 "src/f32-sigmoid/gen/psimd-p5-div-x12.c", 325 "src/f32-sigmoid/gen/psimd-p5-div-x16.c", 326 "src/f32-sigmoid/gen/psimd-p5-div-x20.c", 327 "src/f32-sigmoid/gen/psimd-p5-div-x24.c", 328 "src/math/sigmoid-psimd-p5-div.c", 329] 330 331// ISA-specific micro-kernels 332NEON_UKERNELS = [ 333 "src/f32-avgpool/mp9p8q-neon.c", 334 "src/f32-avgpool/up9-neon.c", 335 "src/f32-bilinear/gen/neon-c4.c", 336 "src/f32-bilinear/gen/neon-c8.c", 337 "src/f32-clamp/neon.c", 338 "src/f32-dwconv/gen/up4x9-neon.c", 339 "src/f32-dwconv/gen/up4x9-neon-acc2.c", 340 "src/f32-dwconv/gen/up8x9-neon.c", 341 "src/f32-dwconv/gen/up8x9-neon-acc2.c", 342 "src/f32-gavgpool-spchw/neon-x4.c", 343 "src/f32-gavgpool/mp7p7q-neon.c", 344 "src/f32-gavgpool/up7-neon.c", 345 "src/f32-gemm/gen/1x8-neon-lane-ld64.c", 346 "src/f32-gemm/gen/4x2-neon-lane-ld64.c", 347 "src/f32-gemm/gen/4x8-neon-lane-ld128.c", 348 "src/f32-gemm/gen/4x8-neon-lane-ld64.c", 349 "src/f32-gemm/gen/5x8-neon-lane-ld64.c", 350 "src/f32-gemm/gen/6x8-neon-lane-ld64.c", 351 "src/f32-gemm/gen/6x8-neon-lane-ld128.c", 352 "src/f32-gemm/gen/1x8-neon-dup-ld64.c", 353 "src/f32-gemm/gen/4x8-neon-dup-ld128.c", 354 "src/f32-gemm/gen/4x8-neon-dup-ld64.c", 355 "src/f32-gemm/gen/6x8-neon-dup-ld64.c", 356 "src/f32-gemm/gen/6x8-neon-dup-ld128.c", 357 "src/f32-gemm/gen/1x8s4-neon.c", 358 "src/f32-gemm/gen/4x8s4-neon.c", 359 "src/f32-gemm/gen/6x8s4-neon.c", 360 "src/f32-gemm/gen/8x8s4-neon.c", 361 "src/f32-gemm/gen-inc/1x8-neon-lane-ld64.c", 362 "src/f32-gemm/gen-inc/4x8-neon-lane-ld128.c", 363 "src/f32-gemm/gen-inc/4x8-neon-lane-ld64.c", 364 "src/f32-gemm/gen-inc/5x8-neon-lane-ld64.c", 365 "src/f32-gemm/gen-inc/6x8-neon-lane-ld64.c", 366 "src/f32-gemm/gen-inc/6x8-neon-lane-ld128.c", 367 "src/f32-gemm/gen-inc/1x8-neon-dup-ld64.c", 368 "src/f32-gemm/gen-inc/4x8-neon-dup-ld128.c", 369 "src/f32-gemm/gen-inc/4x8-neon-dup-ld64.c", 370 "src/f32-gemm/gen-inc/6x8-neon-dup-ld64.c", 371 "src/f32-gemm/gen-inc/6x8-neon-dup-ld128.c", 372 "src/f32-gemm/gen-inc/1x8s4-neon.c", 373 "src/f32-gemm/gen-inc/4x8s4-neon.c", 374 "src/f32-gemm/gen-inc/6x8s4-neon.c", 375 "src/f32-gemm/gen-inc/8x8s4-neon.c", 376 "src/f32-hswish/gen/neon-x4.c", 377 "src/f32-hswish/gen/neon-x8.c", 378 "src/f32-igemm/gen/1x8-neon-lane-ld64.c", 379 "src/f32-igemm/gen/4x2-neon-lane-ld64.c", 380 "src/f32-igemm/gen/4x4-neon-lane-ld64.c", 381 "src/f32-igemm/gen/4x8-neon-lane-ld128.c", 382 "src/f32-igemm/gen/4x8-neon-lane-ld64.c", 383 "src/f32-igemm/gen/6x8-neon-lane-ld64.c", 384 "src/f32-igemm/gen/6x8-neon-lane-ld128.c", 385 "src/f32-igemm/gen/1x8-neon-dup-ld64.c", 386 "src/f32-igemm/gen/4x8-neon-dup-ld128.c", 387 "src/f32-igemm/gen/4x8-neon-dup-ld64.c", 388 "src/f32-igemm/gen/6x8-neon-dup-ld64.c", 389 "src/f32-igemm/gen/6x8-neon-dup-ld128.c", 390 "src/f32-igemm/gen/1x8s4-neon.c", 391 "src/f32-igemm/gen/4x8s4-neon.c", 392 "src/f32-igemm/gen/6x8s4-neon.c", 393 "src/f32-igemm/gen/8x8s4-neon.c", 394 "src/f32-pavgpool/mp9p8q-neon.c", 395 "src/f32-pavgpool/up9-neon.c", 396 "src/f32-ppmm/gen/4x8-neon.c", 397 "src/f32-ppmm/gen/8x8-neon.c", 398 "src/f32-prelu/gen/neon-2x4.c", 399 "src/f32-prelu/gen/neon-2x8.c", 400 "src/f32-raddstoreexpminusmax/gen/neon-p5-x4.c", 401 "src/f32-raddstoreexpminusmax/gen/neon-p5-x8.c", 402 "src/f32-raddstoreexpminusmax/gen/neon-p5-x8-acc2.c", 403 "src/f32-raddstoreexpminusmax/gen/neon-p5-x12.c", 404 "src/f32-raddstoreexpminusmax/gen/neon-p5-x12-acc2.c", 405 "src/f32-raddstoreexpminusmax/gen/neon-p5-x12-acc3.c", 406 "src/f32-raddstoreexpminusmax/gen/neon-p5-x16.c", 407 "src/f32-raddstoreexpminusmax/gen/neon-p5-x16-acc2.c", 408 "src/f32-raddstoreexpminusmax/gen/neon-p5-x16-acc4.c", 409 "src/f32-raddstoreexpminusmax/gen/neon-p5-x20.c", 410 "src/f32-raddstoreexpminusmax/gen/neon-p5-x20-acc2.c", 411 "src/f32-raddstoreexpminusmax/gen/neon-p5-x20-acc5.c", 412 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x4.c", 413 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x8.c", 414 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x8-acc2.c", 415 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x12.c", 416 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x12-acc2.c", 417 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x12-acc3.c", 418 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x16.c", 419 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x16-acc2.c", 420 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x16-acc4.c", 421 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x20.c", 422 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x20-acc2.c", 423 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x20-acc5.c", 424 "src/f32-rmax/neon.c", 425 "src/f32-sigmoid/gen/neon-frac-p9-p10-nr1recps-x16.c", 426 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x4.c", 427 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x8.c", 428 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x12.c", 429 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x16.c", 430 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x20.c", 431 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x24.c", 432 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x4.c", 433 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x8.c", 434 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x12.c", 435 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x16.c", 436 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x20.c", 437 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x24.c", 438 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x4.c", 439 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x8.c", 440 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x12.c", 441 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x16.c", 442 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x20.c", 443 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x24.c", 444 "src/f32-vbinary/gen/vadd-neon-x4.c", 445 "src/f32-vbinary/gen/vadd-neon-x8.c", 446 "src/f32-vbinary/gen/vaddc-neon-x4.c", 447 "src/f32-vbinary/gen/vaddc-neon-x8.c", 448 "src/f32-vbinary/gen/vmax-neon-x4.c", 449 "src/f32-vbinary/gen/vmax-neon-x8.c", 450 "src/f32-vbinary/gen/vmaxc-neon-x4.c", 451 "src/f32-vbinary/gen/vmaxc-neon-x8.c", 452 "src/f32-vbinary/gen/vmin-neon-x4.c", 453 "src/f32-vbinary/gen/vmin-neon-x8.c", 454 "src/f32-vbinary/gen/vminc-neon-x4.c", 455 "src/f32-vbinary/gen/vminc-neon-x8.c", 456 "src/f32-vbinary/gen/vmul-neon-x4.c", 457 "src/f32-vbinary/gen/vmul-neon-x8.c", 458 "src/f32-vbinary/gen/vmulc-neon-x4.c", 459 "src/f32-vbinary/gen/vmulc-neon-x8.c", 460 "src/f32-vbinary/gen/vrsubc-neon-x4.c", 461 "src/f32-vbinary/gen/vrsubc-neon-x8.c", 462 "src/f32-vbinary/gen/vsub-neon-x4.c", 463 "src/f32-vbinary/gen/vsub-neon-x8.c", 464 "src/f32-vbinary/gen/vsubc-neon-x4.c", 465 "src/f32-vbinary/gen/vsubc-neon-x8.c", 466 "src/f32-vmulcaddc/gen/c4-neon-2x.c", 467 "src/f32-vmulcaddc/gen/c8-neon-2x.c", 468 "src/q8-avgpool/mp9p8q-neon.c", 469 "src/q8-avgpool/up9-neon.c", 470 "src/q8-dwconv/up8x9-neon.c", 471 "src/q8-gavgpool/mp7p7q-neon.c", 472 "src/q8-gavgpool/up7-neon.c", 473 "src/q8-gemm/4x8-neon.c", 474 "src/q8-gemm/8x8-neon.c", 475 "src/q8-igemm/4x8-neon.c", 476 "src/q8-igemm/8x8-neon.c", 477 "src/q8-vadd/neon.c", 478 "src/u8-clamp/neon.c", 479 "src/u8-maxpool/9p8x-neon-c16.c", 480 "src/u8-rmax/neon.c", 481 "src/x32-packx/x4-neon-st4.c", 482 "src/x32-pad/x2-neon.c", 483 "src/x32-zip/x2-neon.c", 484 "src/x32-zip/x3-neon.c", 485 "src/x32-zip/x4-neon.c", 486 "src/x32-zip/xm-neon.c", 487 "src/x8-zip/x2-neon.c", 488 "src/x8-zip/x3-neon.c", 489 "src/x8-zip/x4-neon.c", 490 "src/x8-zip/xm-neon.c", 491 "src/math/sigmoid-neon-frac-p9-p10-nr1recps.c", 492 "src/math/sigmoid-neon-rr1-lut2048-p1-nr2recps.c", 493 "src/math/sigmoid-neon-rr1-lut64-p2-nr2recps.c", 494 "src/math/sigmoid-neon-rr1-p5-nr2recps.c", 495 "src/math/sigmoid-neon-rr2-lut2048-p1-nr2recps.c", 496 "src/math/sigmoid-neon-rr2-lut64-p2-nr2recps.c", 497 "src/math/sigmoid-neon-rr2-p5-nr2recps.c", 498] 499 500NEONFMA_UKERNELS = [ 501 "src/f32-bilinear/gen/neonfma-c4.c", 502 "src/f32-bilinear/gen/neonfma-c8.c", 503 "src/f32-igemm/gen/1x8-neonfma-dup-ld64.c", 504 "src/f32-igemm/gen/4x8-neonfma-dup-ld128.c", 505 "src/f32-igemm/gen/4x8-neonfma-dup-ld64.c", 506 "src/f32-igemm/gen/6x8-neonfma-dup-ld64.c", 507 "src/f32-igemm/gen/6x8-neonfma-dup-ld128.c", 508 "src/f32-igemm/gen/1x8s4-neonfma.c", 509 "src/f32-igemm/gen/4x8s4-neonfma.c", 510 "src/f32-igemm/gen/6x8s4-neonfma.c", 511 "src/f32-igemm/gen/8x8s4-neonfma.c", 512 "src/f32-dwconv/gen/up4x9-neonfma.c", 513 "src/f32-dwconv/gen/up4x9-neonfma-acc2.c", 514 "src/f32-dwconv/gen/up8x9-neonfma.c", 515 "src/f32-dwconv/gen/up8x9-neonfma-acc2.c", 516 "src/f32-gemm/gen/1x8-neonfma-dup-ld64.c", 517 "src/f32-gemm/gen/4x8-neonfma-dup-ld128.c", 518 "src/f32-gemm/gen/4x8-neonfma-dup-ld64.c", 519 "src/f32-gemm/gen/6x8-neonfma-dup-ld64.c", 520 "src/f32-gemm/gen/6x8-neonfma-dup-ld128.c", 521 "src/f32-gemm/gen/1x8s4-neonfma.c", 522 "src/f32-gemm/gen/4x8s4-neonfma.c", 523 "src/f32-gemm/gen/6x8s4-neonfma.c", 524 "src/f32-gemm/gen/8x8s4-neonfma.c", 525 "src/f32-gemm/gen-inc/1x8-neonfma-dup-ld64.c", 526 "src/f32-gemm/gen-inc/4x8-neonfma-dup-ld128.c", 527 "src/f32-gemm/gen-inc/4x8-neonfma-dup-ld64.c", 528 "src/f32-gemm/gen-inc/6x8-neonfma-dup-ld64.c", 529 "src/f32-gemm/gen-inc/6x8-neonfma-dup-ld128.c", 530 "src/f32-gemm/gen-inc/1x8s4-neonfma.c", 531 "src/f32-gemm/gen-inc/4x8s4-neonfma.c", 532 "src/f32-gemm/gen-inc/6x8s4-neonfma.c", 533 "src/f32-gemm/gen-inc/8x8s4-neonfma.c", 534 "src/f32-hswish/gen/neonfma-x4.c", 535 "src/f32-hswish/gen/neonfma-x8.c", 536 "src/f32-ppmm/gen/4x8-neonfma.c", 537 "src/f32-ppmm/gen/8x8-neonfma.c", 538 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x4.c", 539 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x8.c", 540 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x8-acc2.c", 541 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x12.c", 542 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x12-acc2.c", 543 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x12-acc3.c", 544 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x16.c", 545 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x16-acc2.c", 546 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x16-acc4.c", 547 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x20.c", 548 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x20-acc2.c", 549 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x20-acc5.c", 550 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x4.c", 551 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x8.c", 552 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x8-acc2.c", 553 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x12.c", 554 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x12-acc2.c", 555 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x12-acc3.c", 556 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x16.c", 557 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x16-acc2.c", 558 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x16-acc4.c", 559 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x20.c", 560 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x20-acc2.c", 561 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x20-acc5.c", 562 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x4.c", 563 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x8.c", 564 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x12.c", 565 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x16.c", 566 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x20.c", 567 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x24.c", 568 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x4.c", 569 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x8.c", 570 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x12.c", 571 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x16.c", 572 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x20.c", 573 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x24.c", 574 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x4.c", 575 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x8.c", 576 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x12.c", 577 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x16.c", 578 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x20.c", 579 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x24.c", 580 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x4.c", 581 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x8.c", 582 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x12.c", 583 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x16.c", 584 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x20.c", 585 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x24.c", 586 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x4.c", 587 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x8.c", 588 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x12.c", 589 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x16.c", 590 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x20.c", 591 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x24.c", 592 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x4.c", 593 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x8.c", 594 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x12.c", 595 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x16.c", 596 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x20.c", 597 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x24.c", 598 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x4.c", 599 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x8.c", 600 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x12.c", 601 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x16.c", 602 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x20.c", 603 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x24.c", 604 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x4.c", 605 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c", 606 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x12.c", 607 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x16.c", 608 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x20.c", 609 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x24.c", 610 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x4.c", 611 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x8.c", 612 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x12.c", 613 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x16.c", 614 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x20.c", 615 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x24.c", 616 "src/f32-vmulcaddc/gen/c4-neonfma-2x.c", 617 "src/f32-vmulcaddc/gen/c8-neonfma-2x.c", 618 "src/math/exp-neonfma-lut64-p2.c", 619 "src/math/exp-neonfma-p5.c", 620 "src/math/expminus-neonfma-lut2048-p1.c", 621 "src/math/expminus-neonfma-lut64-p2.c", 622 "src/math/expminus-neonfma-p5.c", 623 "src/math/sigmoid-neonfma-rr1-lut2048-p1-nr1recps1fma.c", 624 "src/math/sigmoid-neonfma-rr1-lut2048-p1-nr2fma.c", 625 "src/math/sigmoid-neonfma-rr1-lut2048-p1-nr2recps.c", 626 "src/math/sigmoid-neonfma-rr1-lut64-p2-nr1recps1fma.c", 627 "src/math/sigmoid-neonfma-rr1-lut64-p2-nr2fma.c", 628 "src/math/sigmoid-neonfma-rr1-lut64-p2-nr2recps.c", 629 "src/math/sigmoid-neonfma-rr1-p5-nr1recps1fma.c", 630 "src/math/sigmoid-neonfma-rr1-p5-nr2fma.c", 631 "src/math/sigmoid-neonfma-rr1-p5-nr2recps.c", 632 "src/math/sigmoid-neonfma-rr2-lut2048-p1-nr1recps1fma.c", 633 "src/math/sigmoid-neonfma-rr2-lut2048-p1-nr2fma.c", 634 "src/math/sigmoid-neonfma-rr2-lut2048-p1-nr2recps.c", 635 "src/math/sigmoid-neonfma-rr2-lut64-p2-nr1recps1fma.c", 636 "src/math/sigmoid-neonfma-rr2-lut64-p2-nr2fma.c", 637 "src/math/sigmoid-neonfma-rr2-lut64-p2-nr2recps.c", 638 "src/math/sigmoid-neonfma-rr2-p5-nr1recps1fma.c", 639 "src/math/sigmoid-neonfma-rr2-p5-nr2fma.c", 640 "src/math/sigmoid-neonfma-rr2-p5-nr2recps.c", 641] 642 643AARCH64_NEONFMA_UKERNELS = [ 644 "src/f32-vbinary/gen/vdiv-neon-x4.c", 645 "src/f32-vbinary/gen/vdiv-neon-x8.c", 646 "src/f32-vbinary/gen/vdivc-neon-x4.c", 647 "src/f32-vbinary/gen/vdivc-neon-x8.c", 648 "src/f32-vbinary/gen/vrdivc-neon-x4.c", 649 "src/f32-vbinary/gen/vrdivc-neon-x8.c", 650 "src/f32-gemm/gen/1x8-neonfma-lane-ld64.c", 651 "src/f32-gemm/gen/4x2-neonfma-lane-ld64.c", 652 "src/f32-gemm/gen/4x8-neonfma-lane-ld128.c", 653 "src/f32-gemm/gen/4x8-neonfma-lane-ld64.c", 654 "src/f32-gemm/gen/5x8-neonfma-lane-ld64.c", 655 "src/f32-gemm/gen/6x8-neonfma-lane-ld64.c", 656 "src/f32-gemm/gen/6x8-neonfma-lane-ld128.c", 657 "src/f32-gemm/gen-inc/1x8-neonfma-lane-ld64.c", 658 "src/f32-gemm/gen-inc/4x8-neonfma-lane-ld128.c", 659 "src/f32-gemm/gen-inc/4x8-neonfma-lane-ld64.c", 660 "src/f32-gemm/gen-inc/5x8-neonfma-lane-ld64.c", 661 "src/f32-gemm/gen-inc/6x8-neonfma-lane-ld64.c", 662 "src/f32-gemm/gen-inc/6x8-neonfma-lane-ld128.c", 663 "src/f32-igemm/gen/1x8-neonfma-lane-ld64.c", 664 "src/f32-igemm/gen/4x2-neonfma-lane-ld64.c", 665 "src/f32-igemm/gen/4x4-neonfma-lane-ld64.c", 666 "src/f32-igemm/gen/4x8-neonfma-lane-ld128.c", 667 "src/f32-igemm/gen/4x8-neonfma-lane-ld64.c", 668 "src/f32-igemm/gen/6x8-neonfma-lane-ld64.c", 669 "src/f32-igemm/gen/6x8-neonfma-lane-ld128.c", 670 "src/f32-conv-hwc/3x3s2p1c3x4-neonfma-2x2.c", 671 "src/f32-conv-hwc/3x3s2p1c3x8-neonfma-2x2.c", 672 "src/f32-conv-hwc2spchw/3x3s2p1c3x4-neonfma-2x2.c", 673 "src/f32-dwconv-spchw/3x3p1-neonfma.c", 674 "src/f32-dwconv-spchw/5x5p2-neonfma.c", 675 "src/f32-dwconv-spchw/3x3s2p1-neonfma.c", 676 "src/f32-dwconv-spchw/5x5s2p2-neonfma.c", 677 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x4.c", 678 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x8.c", 679 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x12.c", 680 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x16.c", 681 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x20.c", 682 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x24.c", 683 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x4.c", 684 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x8.c", 685 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x12.c", 686 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x16.c", 687 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x20.c", 688 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x24.c", 689 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x4.c", 690 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x8.c", 691 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x12.c", 692 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x16.c", 693 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x20.c", 694 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x24.c", 695 "src/f32-spmm/gen/12x1-neonfma.c", 696 "src/f32-spmm/gen/12x2-neonfma.c", 697 "src/f32-spmm/gen/12x4-neonfma.c", 698 "src/f32-spmm/gen/16x1-neonfma-pipelined.c", 699 "src/f32-spmm/gen/16x1-neonfma-unroll2.c", 700 "src/f32-spmm/gen/16x1-neonfma.c", 701 "src/f32-spmm/gen/16x2-neonfma.c", 702 "src/f32-spmm/gen/16x4-neonfma.c", 703 "src/f32-spmm/gen/4x1-neonfma-pipelined.c", 704 "src/f32-spmm/gen/4x1-neonfma-unroll2.c", 705 "src/f32-spmm/gen/4x1-neonfma.c", 706 "src/f32-spmm/gen/4x2-neonfma.c", 707 "src/f32-spmm/gen/4x4-neonfma.c", 708 "src/f32-spmm/gen/8x1-neonfma-pipelined.c", 709 "src/f32-spmm/gen/8x1-neonfma-unroll2.c", 710 "src/f32-spmm/gen/8x1-neonfma.c", 711 "src/f32-spmm/gen/8x2-neonfma.c", 712 "src/f32-spmm/gen/8x4-neonfma.c", 713 "src/math/sigmoid-neonfma-rr1-lut2048-p1-div.c", 714 "src/math/sigmoid-neonfma-rr1-lut64-p2-div.c", 715 "src/math/sigmoid-neonfma-rr1-p5-div.c", 716 "src/math/sigmoid-neonfma-rr2-lut2048-p1-div.c", 717 "src/math/sigmoid-neonfma-rr2-lut64-p2-div.c", 718 "src/math/sigmoid-neonfma-rr2-p5-div.c", 719] 720 721AARCH64_NEONFP16ARITH_UKERNELS = [ 722 "src/f16-gemm/gen/4x8-neonfp16arith-ld64.c", 723 "src/f16-gemm/gen/6x8-neonfp16arith-ld64.c", 724 "src/f16-gemm/gen/8x8-neonfp16arith-ld64.c", 725] 726 727SSE_UKERNELS = [ 728 "src/f32-avgpool/mp9p8q-sse.c", 729 "src/f32-avgpool/up9-sse.c", 730 "src/f32-bilinear/gen/sse-c4.c", 731 "src/f32-bilinear/gen/sse-c8.c", 732 "src/f32-clamp/sse.c", 733 "src/f32-dwconv-spchw/3x3p1-sse.c", 734 "src/f32-dwconv-spchw/3x3s2p1-sse.c", 735 "src/f32-dwconv/gen/up4x25-sse-acc2.c", 736 "src/f32-dwconv/gen/up4x25-sse.c", 737 "src/f32-dwconv/gen/up4x4-sse-acc2.c", 738 "src/f32-dwconv/gen/up4x4-sse.c", 739 "src/f32-dwconv/gen/up4x9-sse-acc2.c", 740 "src/f32-dwconv/gen/up4x9-sse.c", 741 "src/f32-dwconv/gen/up8x25-sse-acc2.c", 742 "src/f32-dwconv/gen/up8x25-sse.c", 743 "src/f32-dwconv/gen/up8x4-sse-acc2.c", 744 "src/f32-dwconv/gen/up8x4-sse.c", 745 "src/f32-dwconv/gen/up8x9-sse-acc2.c", 746 "src/f32-dwconv/gen/up8x9-sse.c", 747 "src/f32-gavgpool-spchw/sse-x4.c", 748 "src/f32-gavgpool/mp7p7q-sse.c", 749 "src/f32-gavgpool/up7-sse.c", 750 "src/f32-gemm/gen/1x8-sse-dup.c", 751 "src/f32-gemm/gen/1x8-sse-load1.c", 752 "src/f32-gemm/gen/1x8s4-sse.c", 753 "src/f32-gemm/gen/4x8-sse-dup.c", 754 "src/f32-gemm/gen/4x8-sse-load1.c", 755 "src/f32-gemm/gen/4x8s4-sse.c", 756 "src/f32-gemm/gen-inc/1x8-sse-dup.c", 757 "src/f32-gemm/gen-inc/1x8-sse-load1.c", 758 "src/f32-gemm/gen-inc/1x8s4-sse.c", 759 "src/f32-gemm/gen-inc/4x8-sse-dup.c", 760 "src/f32-gemm/gen-inc/4x8-sse-load1.c", 761 "src/f32-gemm/gen-inc/4x8s4-sse.c", 762 "src/f32-hswish/gen/sse-x4.c", 763 "src/f32-hswish/gen/sse-x8.c", 764 "src/f32-igemm/gen/1x8-sse-dup.c", 765 "src/f32-igemm/gen/1x8-sse-load1.c", 766 "src/f32-igemm/gen/1x8s4-sse.c", 767 "src/f32-igemm/gen/4x2c4-sse.c", 768 "src/f32-igemm/gen/4x8-sse-dup.c", 769 "src/f32-igemm/gen/4x8-sse-load1.c", 770 "src/f32-igemm/gen/4x8s4-sse.c", 771 "src/f32-maxpool/9p8x-sse-c4.c", 772 "src/f32-pavgpool/mp9p8q-sse.c", 773 "src/f32-pavgpool/up9-sse.c", 774 "src/f32-ppmm/gen/4x8-sse.c", 775 "src/f32-rmax/sse.c", 776 "src/f32-spmm/gen/4x1-sse.c", 777 "src/f32-spmm/gen/8x1-sse.c", 778 "src/f32-vbinary/gen/vadd-sse-x4.c", 779 "src/f32-vbinary/gen/vadd-sse-x8.c", 780 "src/f32-vbinary/gen/vaddc-sse-x4.c", 781 "src/f32-vbinary/gen/vaddc-sse-x8.c", 782 "src/f32-vbinary/gen/vdiv-sse-x4.c", 783 "src/f32-vbinary/gen/vdiv-sse-x8.c", 784 "src/f32-vbinary/gen/vdivc-sse-x4.c", 785 "src/f32-vbinary/gen/vdivc-sse-x8.c", 786 "src/f32-vbinary/gen/vmax-sse-x4.c", 787 "src/f32-vbinary/gen/vmax-sse-x8.c", 788 "src/f32-vbinary/gen/vmaxc-sse-x4.c", 789 "src/f32-vbinary/gen/vmaxc-sse-x8.c", 790 "src/f32-vbinary/gen/vmin-sse-x4.c", 791 "src/f32-vbinary/gen/vmin-sse-x8.c", 792 "src/f32-vbinary/gen/vminc-sse-x4.c", 793 "src/f32-vbinary/gen/vminc-sse-x8.c", 794 "src/f32-vbinary/gen/vmul-sse-x4.c", 795 "src/f32-vbinary/gen/vmul-sse-x8.c", 796 "src/f32-vbinary/gen/vmulc-sse-x4.c", 797 "src/f32-vbinary/gen/vmulc-sse-x8.c", 798 "src/f32-vbinary/gen/vrdivc-sse-x4.c", 799 "src/f32-vbinary/gen/vrdivc-sse-x8.c", 800 "src/f32-vbinary/gen/vrsubc-sse-x4.c", 801 "src/f32-vbinary/gen/vrsubc-sse-x8.c", 802 "src/f32-vbinary/gen/vsub-sse-x4.c", 803 "src/f32-vbinary/gen/vsub-sse-x8.c", 804 "src/f32-vbinary/gen/vsubc-sse-x4.c", 805 "src/f32-vbinary/gen/vsubc-sse-x8.c", 806 "src/f32-vmulcaddc/gen/c4-sse-2x.c", 807 "src/f32-vmulcaddc/gen/c8-sse-2x.c", 808 "src/x32-packx/x4-sse.c", 809] 810 811SSE2_UKERNELS = [ 812 "src/f32-argmaxpool/9p8x-sse2-c4.c", 813 "src/f32-argmaxpool/4x-sse2-c4.c", 814 "src/f32-argmaxpool/9x-sse2-c4.c", 815 "src/f32-prelu/gen/sse2-2x4.c", 816 "src/f32-prelu/gen/sse2-2x8.c", 817 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x4.c", 818 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x8.c", 819 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x8-acc2.c", 820 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x12.c", 821 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x12-acc2.c", 822 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x12-acc3.c", 823 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x16.c", 824 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x16-acc2.c", 825 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x16-acc4.c", 826 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x20.c", 827 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x20-acc2.c", 828 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x20-acc5.c", 829 "src/f32-sigmoid/gen/sse2-p5-div-x4.c", 830 "src/f32-sigmoid/gen/sse2-p5-div-x8.c", 831 "src/f32-sigmoid/gen/sse2-p5-div-x12.c", 832 "src/f32-sigmoid/gen/sse2-p5-div-x16.c", 833 "src/f32-sigmoid/gen/sse2-p5-div-x20.c", 834 "src/f32-sigmoid/gen/sse2-p5-div-x24.c", 835 "src/q8-avgpool/mp9p8q-sse2.c", 836 "src/q8-avgpool/up9-sse2.c", 837 "src/q8-igemm/4x4c2-sse2.c", 838 "src/q8-dwconv/up8x9-sse2.c", 839 "src/q8-gavgpool/mp7p7q-sse2.c", 840 "src/q8-gavgpool/up7-sse2.c", 841 "src/q8-gemm/2x4c8-sse2.c", 842 "src/q8-gemm/4x4c2-sse2.c", 843 "src/q8-vadd/sse2.c", 844 "src/u8-clamp/sse2.c", 845 "src/u8-maxpool/9p8x-sse2-c16.c", 846 "src/u8-rmax/sse2.c", 847 "src/x32-pad/x2-sse2.c", 848 "src/x32-zip/x2-sse2.c", 849 "src/x32-zip/x3-sse2.c", 850 "src/x32-zip/x4-sse2.c", 851 "src/x32-zip/xm-sse2.c", 852 "src/x8-zip/x2-sse2.c", 853 "src/x8-zip/x3-sse2.c", 854 "src/x8-zip/x4-sse2.c", 855 "src/x8-zip/xm-sse2.c", 856 "src/math/exp-sse2-p5.c", 857 "src/math/expminus-sse2-p5.c", 858 "src/math/sigmoid-sse2-p5-div.c", 859] 860 861SSE41_UKERNELS = [ 862 "src/f32-prelu/gen/sse41-2x4.c", 863 "src/f32-prelu/gen/sse41-2x8.c", 864 "src/f32-sigmoid/gen/sse41-p5-div-x4.c", 865 "src/f32-sigmoid/gen/sse41-p5-div-x8.c", 866 "src/f32-sigmoid/gen/sse41-p5-div-x12.c", 867 "src/f32-sigmoid/gen/sse41-p5-div-x16.c", 868 "src/f32-sigmoid/gen/sse41-p5-div-x20.c", 869 "src/f32-sigmoid/gen/sse41-p5-div-x24.c", 870] 871 872AVX_UKERNELS = [ 873 "src/f32-clamp/avx.c", 874 "src/f32-dwconv/gen/up16x4-avx-acc2.c", 875 "src/f32-dwconv/gen/up16x4-avx.c", 876 "src/f32-dwconv/gen/up8x4-avx-acc2.c", 877 "src/f32-dwconv/gen/up8x4-avx.c", 878 "src/f32-dwconv/gen/up16x9-avx-acc2.c", 879 "src/f32-dwconv/gen/up16x9-avx.c", 880 "src/f32-dwconv/gen/up8x9-avx-acc2.c", 881 "src/f32-dwconv/gen/up8x9-avx.c", 882 "src/f32-dwconv/gen/up16x25-avx-acc2.c", 883 "src/f32-dwconv/gen/up16x25-avx.c", 884 "src/f32-dwconv/gen/up8x25-avx-acc2.c", 885 "src/f32-dwconv/gen/up8x25-avx.c", 886 "src/f32-gemm/gen/1x8-avx-broadcast.c", 887 "src/f32-gemm/gen/4x8-avx-broadcast.c", 888 "src/f32-gemm/gen/5x8-avx-broadcast.c", 889 "src/f32-gemm/gen/6x8-avx-broadcast.c", 890 "src/f32-gemm/gen/7x8-avx-broadcast.c", 891 "src/f32-gemm/gen/1x16-avx-broadcast.c", 892 "src/f32-gemm/gen/3x16-avx-broadcast.c", 893 "src/f32-gemm/gen/4x16-avx-broadcast.c", 894 "src/f32-gemm/gen/5x16-avx-broadcast.c", 895 "src/f32-gemm/gen-inc/1x8-avx-broadcast.c", 896 "src/f32-gemm/gen-inc/4x8-avx-broadcast.c", 897 "src/f32-gemm/gen-inc/5x8-avx-broadcast.c", 898 "src/f32-gemm/gen-inc/6x8-avx-broadcast.c", 899 "src/f32-gemm/gen-inc/7x8-avx-broadcast.c", 900 "src/f32-gemm/gen-inc/1x16-avx-broadcast.c", 901 "src/f32-gemm/gen-inc/3x16-avx-broadcast.c", 902 "src/f32-gemm/gen-inc/4x16-avx-broadcast.c", 903 "src/f32-gemm/gen-inc/5x16-avx-broadcast.c", 904 "src/f32-hswish/gen/avx-x8.c", 905 "src/f32-hswish/gen/avx-x16.c", 906 "src/f32-igemm/gen/1x8-avx-broadcast.c", 907 "src/f32-igemm/gen/4x8-avx-broadcast.c", 908 "src/f32-igemm/gen/5x8-avx-broadcast.c", 909 "src/f32-igemm/gen/6x8-avx-broadcast.c", 910 "src/f32-igemm/gen/7x8-avx-broadcast.c", 911 "src/f32-igemm/gen/1x16-avx-broadcast.c", 912 "src/f32-igemm/gen/3x16-avx-broadcast.c", 913 "src/f32-igemm/gen/4x16-avx-broadcast.c", 914 "src/f32-igemm/gen/5x16-avx-broadcast.c", 915 "src/f32-rmax/avx.c", 916 "src/f32-vbinary/gen/vadd-avx-x8.c", 917 "src/f32-vbinary/gen/vadd-avx-x16.c", 918 "src/f32-vbinary/gen/vaddc-avx-x8.c", 919 "src/f32-vbinary/gen/vaddc-avx-x16.c", 920 "src/f32-vbinary/gen/vdiv-avx-x8.c", 921 "src/f32-vbinary/gen/vdiv-avx-x16.c", 922 "src/f32-vbinary/gen/vdivc-avx-x8.c", 923 "src/f32-vbinary/gen/vdivc-avx-x16.c", 924 "src/f32-vbinary/gen/vmax-avx-x8.c", 925 "src/f32-vbinary/gen/vmax-avx-x16.c", 926 "src/f32-vbinary/gen/vmaxc-avx-x8.c", 927 "src/f32-vbinary/gen/vmaxc-avx-x16.c", 928 "src/f32-vbinary/gen/vmin-avx-x8.c", 929 "src/f32-vbinary/gen/vmin-avx-x16.c", 930 "src/f32-vbinary/gen/vminc-avx-x8.c", 931 "src/f32-vbinary/gen/vminc-avx-x16.c", 932 "src/f32-vbinary/gen/vmul-avx-x8.c", 933 "src/f32-vbinary/gen/vmul-avx-x16.c", 934 "src/f32-vbinary/gen/vmulc-avx-x8.c", 935 "src/f32-vbinary/gen/vmulc-avx-x16.c", 936 "src/f32-vbinary/gen/vrdivc-avx-x8.c", 937 "src/f32-vbinary/gen/vrdivc-avx-x16.c", 938 "src/f32-vbinary/gen/vrsubc-avx-x8.c", 939 "src/f32-vbinary/gen/vrsubc-avx-x16.c", 940 "src/f32-vbinary/gen/vsub-avx-x8.c", 941 "src/f32-vbinary/gen/vsub-avx-x16.c", 942 "src/f32-vbinary/gen/vsubc-avx-x8.c", 943 "src/f32-vbinary/gen/vsubc-avx-x16.c", 944 "src/f32-vscale/avx-unroll32.c", 945] 946 947FMA3_UKERNELS = [ 948 "src/f32-dwconv/gen/up16x4-fma3-acc2.c", 949 "src/f32-dwconv/gen/up16x4-fma3.c", 950 "src/f32-dwconv/gen/up8x4-fma3-acc2.c", 951 "src/f32-dwconv/gen/up8x4-fma3.c", 952 "src/f32-dwconv/gen/up16x9-fma3-acc2.c", 953 "src/f32-dwconv/gen/up16x9-fma3.c", 954 "src/f32-dwconv/gen/up8x9-fma3-acc2.c", 955 "src/f32-dwconv/gen/up8x9-fma3.c", 956 "src/f32-dwconv/gen/up16x25-fma3-acc2.c", 957 "src/f32-dwconv/gen/up16x25-fma3.c", 958 "src/f32-dwconv/gen/up8x25-fma3-acc2.c", 959 "src/f32-dwconv/gen/up8x25-fma3.c", 960 "src/f32-gemm/gen/1x8-fma3-broadcast.c", 961 "src/f32-gemm/gen/4x8-fma3-broadcast.c", 962 "src/f32-gemm/gen/5x8-fma3-broadcast.c", 963 "src/f32-gemm/gen/6x8-fma3-broadcast.c", 964 "src/f32-gemm/gen/7x8-fma3-broadcast.c", 965 "src/f32-gemm/gen/8x8-fma3-broadcast.c", 966 "src/f32-gemm/gen/1x16-fma3-broadcast.c", 967 "src/f32-gemm/gen/3x16-fma3-broadcast.c", 968 "src/f32-gemm/gen/4x16-fma3-broadcast.c", 969 "src/f32-gemm/gen/5x16-fma3-broadcast.c", 970 "src/f32-gemm/gen/1x16s4-fma3-broadcast.c", 971 "src/f32-gemm/gen/3x16s4-fma3-broadcast.c", 972 "src/f32-gemm/gen/4x16s4-fma3-broadcast.c", 973 "src/f32-gemm/gen/5x16s4-fma3-broadcast.c", 974 "src/f32-gemm/gen-inc/1x8-fma3-broadcast.c", 975 "src/f32-gemm/gen-inc/4x8-fma3-broadcast.c", 976 "src/f32-gemm/gen-inc/5x8-fma3-broadcast.c", 977 "src/f32-gemm/gen-inc/6x8-fma3-broadcast.c", 978 "src/f32-gemm/gen-inc/7x8-fma3-broadcast.c", 979 "src/f32-gemm/gen-inc/8x8-fma3-broadcast.c", 980 "src/f32-gemm/gen-inc/1x16-fma3-broadcast.c", 981 "src/f32-gemm/gen-inc/3x16-fma3-broadcast.c", 982 "src/f32-gemm/gen-inc/4x16-fma3-broadcast.c", 983 "src/f32-gemm/gen-inc/5x16-fma3-broadcast.c", 984 "src/f32-gemm/gen-inc/1x16s4-fma3-broadcast.c", 985 "src/f32-gemm/gen-inc/3x16s4-fma3-broadcast.c", 986 "src/f32-gemm/gen-inc/4x16s4-fma3-broadcast.c", 987 "src/f32-gemm/gen-inc/5x16s4-fma3-broadcast.c", 988 "src/f32-hswish/gen/fma3-x8.c", 989 "src/f32-hswish/gen/fma3-x16.c", 990 "src/f32-igemm/gen/1x8-fma3-broadcast.c", 991 "src/f32-igemm/gen/4x8-fma3-broadcast.c", 992 "src/f32-igemm/gen/5x8-fma3-broadcast.c", 993 "src/f32-igemm/gen/6x8-fma3-broadcast.c", 994 "src/f32-igemm/gen/7x8-fma3-broadcast.c", 995 "src/f32-igemm/gen/8x8-fma3-broadcast.c", 996 "src/f32-igemm/gen/1x16-fma3-broadcast.c", 997 "src/f32-igemm/gen/3x16-fma3-broadcast.c", 998 "src/f32-igemm/gen/4x16-fma3-broadcast.c", 999 "src/f32-igemm/gen/5x16-fma3-broadcast.c", 1000 "src/f32-igemm/gen/1x16s4-fma3-broadcast.c", 1001 "src/f32-igemm/gen/3x16s4-fma3-broadcast.c", 1002 "src/f32-igemm/gen/4x16s4-fma3-broadcast.c", 1003 "src/f32-igemm/gen/5x16s4-fma3-broadcast.c", 1004] 1005 1006AVX2_UKERNELS = [ 1007 "src/f32-raddexpminusmax/gen/avx2-p5-x64.c", 1008 "src/f32-raddexpminusmax/gen/avx2-p5-x64-acc2.c", 1009 "src/f32-raddexpminusmax/gen/avx2-p5-x64-acc4.c", 1010 "src/f32-raddexpminusmax/gen/avx2-p5-x72.c", 1011 "src/f32-raddexpminusmax/gen/avx2-p5-x72-acc3.c", 1012 "src/f32-raddexpminusmax/gen/avx2-p5-x80.c", 1013 "src/f32-raddexpminusmax/gen/avx2-p5-x80-acc2.c", 1014 "src/f32-raddexpminusmax/gen/avx2-p5-x80-acc5.c", 1015 "src/f32-raddexpminusmax/gen/avx2-p5-x96.c", 1016 "src/f32-raddexpminusmax/gen/avx2-p5-x96-acc2.c", 1017 "src/f32-raddexpminusmax/gen/avx2-p5-x96-acc3.c", 1018 "src/f32-raddexpminusmax/gen/avx2-p5-x96-acc6.c", 1019 "src/f32-raddextexp/gen/avx2-p5-x64.c", 1020 "src/f32-raddextexp/gen/avx2-p5-x64-acc2.c", 1021 "src/f32-raddextexp/gen/avx2-p5-x64-acc4.c", 1022 "src/f32-raddextexp/gen/avx2-p5-x72.c", 1023 "src/f32-raddextexp/gen/avx2-p5-x72-acc3.c", 1024 "src/f32-raddextexp/gen/avx2-p5-x80.c", 1025 "src/f32-raddextexp/gen/avx2-p5-x80-acc2.c", 1026 "src/f32-raddextexp/gen/avx2-p5-x80-acc5.c", 1027 "src/f32-raddextexp/gen/avx2-p5-x96.c", 1028 "src/f32-raddextexp/gen/avx2-p5-x96-acc2.c", 1029 "src/f32-raddextexp/gen/avx2-p5-x96-acc3.c", 1030 "src/f32-raddextexp/gen/avx2-p5-x96-acc6.c", 1031 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x64.c", 1032 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x64-acc2.c", 1033 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x64-acc4.c", 1034 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x72.c", 1035 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x72-acc3.c", 1036 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x80.c", 1037 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x80-acc2.c", 1038 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x80-acc5.c", 1039 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96.c", 1040 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96-acc2.c", 1041 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96-acc3.c", 1042 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96-acc6.c", 1043 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x8.c", 1044 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x16.c", 1045 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x24.c", 1046 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x32.c", 1047 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x40.c", 1048 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x48.c", 1049 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x56.c", 1050 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x64.c", 1051 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x72.c", 1052 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x80.c", 1053 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x8.c", 1054 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x16.c", 1055 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x24.c", 1056 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x32.c", 1057 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x40.c", 1058 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x48.c", 1059 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x56.c", 1060 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x64.c", 1061 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x72.c", 1062 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x80.c", 1063 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x8.c", 1064 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x16.c", 1065 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x24.c", 1066 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x32.c", 1067 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x40.c", 1068 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x48.c", 1069 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x56.c", 1070 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x64.c", 1071 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x72.c", 1072 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x80.c", 1073 "src/f32-vscaleexpminusmax/gen/avx2-p5-x8.c", 1074 "src/f32-vscaleexpminusmax/gen/avx2-p5-x16.c", 1075 "src/f32-vscaleexpminusmax/gen/avx2-p5-x24.c", 1076 "src/f32-vscaleexpminusmax/gen/avx2-p5-x32.c", 1077 "src/f32-vscaleexpminusmax/gen/avx2-p5-x40.c", 1078 "src/f32-vscaleexpminusmax/gen/avx2-p5-x48.c", 1079 "src/f32-vscaleexpminusmax/gen/avx2-p5-x56.c", 1080 "src/f32-vscaleexpminusmax/gen/avx2-p5-x64.c", 1081 "src/f32-vscaleexpminusmax/gen/avx2-p5-x72.c", 1082 "src/f32-vscaleexpminusmax/gen/avx2-p5-x80.c", 1083 "src/f32-vscaleexpminusmax/gen/avx2-p5-x88.c", 1084 "src/f32-vscaleexpminusmax/gen/avx2-p5-x96.c", 1085 "src/f32-vscaleextexp/gen/avx2-p5-x8.c", 1086 "src/f32-vscaleextexp/gen/avx2-p5-x16.c", 1087 "src/f32-vscaleextexp/gen/avx2-p5-x24.c", 1088 "src/f32-vscaleextexp/gen/avx2-p5-x32.c", 1089 "src/f32-vscaleextexp/gen/avx2-p5-x40.c", 1090 "src/f32-vscaleextexp/gen/avx2-p5-x48.c", 1091 "src/f32-vscaleextexp/gen/avx2-p5-x56.c", 1092 "src/f32-vscaleextexp/gen/avx2-p5-x64.c", 1093 "src/f32-vscaleextexp/gen/avx2-p5-x72.c", 1094 "src/f32-vscaleextexp/gen/avx2-p5-x80.c", 1095 "src/f32-vscaleextexp/gen/avx2-p5-x88.c", 1096 "src/f32-vscaleextexp/gen/avx2-p5-x96.c", 1097 "src/math/exp-avx2-p5.c", 1098 "src/math/exp-avx2-perm-p3.c", 1099 "src/math/exp-avx2-perm-p4.c", 1100 "src/math/expminus-avx2-p5.c", 1101 "src/math/extexp-avx2-p5.c", 1102 "src/math/sigmoid-avx2-rr2-p5-div.c", 1103 "src/math/sigmoid-avx2-rr1-p5-div.c", 1104 "src/math/sigmoid-avx2-rr2-p5-nr2fma.c", 1105 "src/math/sigmoid-avx2-rr1-p5-nr2fma.c", 1106 "src/math/sigmoid-avx2-rr2-p5-nr1fma.c", 1107 "src/math/sigmoid-avx2-rr1-p5-nr1fma.c", 1108] 1109 1110AVX512F_UKERNELS = [ 1111 "src/f32-clamp/avx512f.c", 1112 "src/f32-dwconv/gen/up32x4-avx512f-acc2.c", 1113 "src/f32-dwconv/gen/up32x4-avx512f.c", 1114 "src/f32-dwconv/gen/up16x4-avx512f-acc2.c", 1115 "src/f32-dwconv/gen/up16x4-avx512f.c", 1116 "src/f32-dwconv/gen/up32x9-avx512f-acc2.c", 1117 "src/f32-dwconv/gen/up32x9-avx512f.c", 1118 "src/f32-dwconv/gen/up16x9-avx512f-acc2.c", 1119 "src/f32-dwconv/gen/up16x9-avx512f.c", 1120 "src/f32-dwconv/gen/up32x25-avx512f-acc2.c", 1121 "src/f32-dwconv/gen/up32x25-avx512f.c", 1122 "src/f32-dwconv/gen/up16x25-avx512f-acc2.c", 1123 "src/f32-dwconv/gen/up16x25-avx512f.c", 1124 "src/f32-gemm/gen/1x16-avx512f-broadcast.c", 1125 "src/f32-gemm/gen/4x16-avx512f-broadcast.c", 1126 "src/f32-gemm/gen/5x16-avx512f-broadcast.c", 1127 "src/f32-gemm/gen/6x16-avx512f-broadcast.c", 1128 "src/f32-gemm/gen/7x16-avx512f-broadcast.c", 1129 "src/f32-gemm/gen/8x16-avx512f-broadcast.c", 1130 "src/f32-gemm/gen-inc/1x16-avx512f-broadcast.c", 1131 "src/f32-gemm/gen-inc/4x16-avx512f-broadcast.c", 1132 "src/f32-gemm/gen-inc/5x16-avx512f-broadcast.c", 1133 "src/f32-gemm/gen-inc/6x16-avx512f-broadcast.c", 1134 "src/f32-gemm/gen-inc/7x16-avx512f-broadcast.c", 1135 "src/f32-gemm/gen-inc/8x16-avx512f-broadcast.c", 1136 "src/f32-hswish/gen/avx512f-x16.c", 1137 "src/f32-hswish/gen/avx512f-x32.c", 1138 "src/f32-igemm/gen/1x16-avx512f-broadcast.c", 1139 "src/f32-igemm/gen/4x16-avx512f-broadcast.c", 1140 "src/f32-igemm/gen/5x16-avx512f-broadcast.c", 1141 "src/f32-igemm/gen/6x16-avx512f-broadcast.c", 1142 "src/f32-igemm/gen/7x16-avx512f-broadcast.c", 1143 "src/f32-igemm/gen/8x16-avx512f-broadcast.c", 1144 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x128.c", 1145 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x128-acc2.c", 1146 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x128-acc4.c", 1147 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x144.c", 1148 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x144-acc3.c", 1149 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x160.c", 1150 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x160-acc2.c", 1151 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x160-acc5.c", 1152 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192.c", 1153 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192-acc2.c", 1154 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192-acc3.c", 1155 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192-acc6.c", 1156 "src/f32-raddextexp/gen/avx512f-p5-scalef-x128.c", 1157 "src/f32-raddextexp/gen/avx512f-p5-scalef-x128-acc2.c", 1158 "src/f32-raddextexp/gen/avx512f-p5-scalef-x128-acc4.c", 1159 "src/f32-raddextexp/gen/avx512f-p5-scalef-x144.c", 1160 "src/f32-raddextexp/gen/avx512f-p5-scalef-x144-acc3.c", 1161 "src/f32-raddextexp/gen/avx512f-p5-scalef-x160.c", 1162 "src/f32-raddextexp/gen/avx512f-p5-scalef-x160-acc2.c", 1163 "src/f32-raddextexp/gen/avx512f-p5-scalef-x160-acc5.c", 1164 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192.c", 1165 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192-acc2.c", 1166 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192-acc3.c", 1167 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192-acc6.c", 1168 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x128.c", 1169 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x128-acc2.c", 1170 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x128-acc4.c", 1171 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x144.c", 1172 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x144-acc3.c", 1173 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x160.c", 1174 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x160-acc2.c", 1175 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x160-acc5.c", 1176 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192.c", 1177 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc2.c", 1178 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc3.c", 1179 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc6.c", 1180 "src/f32-rmax/avx512f.c", 1181 "src/f32-vbinary/gen/vadd-avx512f-x16.c", 1182 "src/f32-vbinary/gen/vadd-avx512f-x32.c", 1183 "src/f32-vbinary/gen/vaddc-avx512f-x16.c", 1184 "src/f32-vbinary/gen/vaddc-avx512f-x32.c", 1185 "src/f32-vbinary/gen/vdiv-avx512f-x16.c", 1186 "src/f32-vbinary/gen/vdiv-avx512f-x32.c", 1187 "src/f32-vbinary/gen/vdivc-avx512f-x16.c", 1188 "src/f32-vbinary/gen/vdivc-avx512f-x32.c", 1189 "src/f32-vbinary/gen/vmax-avx512f-x16.c", 1190 "src/f32-vbinary/gen/vmax-avx512f-x32.c", 1191 "src/f32-vbinary/gen/vmaxc-avx512f-x16.c", 1192 "src/f32-vbinary/gen/vmaxc-avx512f-x32.c", 1193 "src/f32-vbinary/gen/vmin-avx512f-x16.c", 1194 "src/f32-vbinary/gen/vmin-avx512f-x32.c", 1195 "src/f32-vbinary/gen/vminc-avx512f-x16.c", 1196 "src/f32-vbinary/gen/vminc-avx512f-x32.c", 1197 "src/f32-vbinary/gen/vmul-avx512f-x16.c", 1198 "src/f32-vbinary/gen/vmul-avx512f-x32.c", 1199 "src/f32-vbinary/gen/vmulc-avx512f-x16.c", 1200 "src/f32-vbinary/gen/vmulc-avx512f-x32.c", 1201 "src/f32-vbinary/gen/vrdivc-avx512f-x16.c", 1202 "src/f32-vbinary/gen/vrdivc-avx512f-x32.c", 1203 "src/f32-vbinary/gen/vrsubc-avx512f-x16.c", 1204 "src/f32-vbinary/gen/vrsubc-avx512f-x32.c", 1205 "src/f32-vbinary/gen/vsub-avx512f-x16.c", 1206 "src/f32-vbinary/gen/vsub-avx512f-x32.c", 1207 "src/f32-vbinary/gen/vsubc-avx512f-x16.c", 1208 "src/f32-vbinary/gen/vsubc-avx512f-x32.c", 1209 "src/f32-vscale/avx512f-unroll64.c", 1210 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x16.c", 1211 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x32.c", 1212 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x48.c", 1213 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x64.c", 1214 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x80.c", 1215 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x96.c", 1216 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x112.c", 1217 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x128.c", 1218 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x144.c", 1219 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x160.c", 1220 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x176.c", 1221 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x192.c", 1222 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x16.c", 1223 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x32.c", 1224 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x48.c", 1225 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x64.c", 1226 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x80.c", 1227 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x96.c", 1228 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x112.c", 1229 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x128.c", 1230 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x144.c", 1231 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x160.c", 1232 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x176.c", 1233 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x192.c", 1234 "src/math/exp-avx512f-p5-scalef.c", 1235 "src/math/exp-avx512f-p5.c", 1236 "src/math/exp-avx512f-perm-p3.c", 1237 "src/math/exp-avx512f-perm2-p2.c", 1238 "src/math/extexp-avx512f-p5.c", 1239] 1240 1241AARCH32_ASM_UKERNELS = [ 1242 "src/q8-dwconv/up8x9-aarch32-neon.S", 1243 "src/f32-gemm/4x8-aarch32-neon-cortex-a53.S", 1244 "src/f32-gemm/gen/4x8-aarch32-neon-cortex-a75.S", 1245 "src/f32-gemm/gen/4x8-aarch32-neon-pld-cortex-a75.S", 1246 "src/f32-gemm/4x8-aarch32-neon-ld64.S", 1247 "src/f32-igemm/4x8-aarch32-neon-ld64.S", 1248 "src/f32-igemm/gen/4x8-aarch32-neon-cortex-a75.S", 1249 "src/f32-igemm/gen/4x8-aarch32-neon-pld-cortex-a75.S", 1250] 1251 1252AARCH64_ASM_UKERNELS = [ 1253 "src/f32-dwconv/up4x9-aarch64-neonfma-cortex-a55.S", 1254 "src/f32-dwconv/up4x9-aarch64-neonfma.S", 1255 "src/f32-gemm/gen/1x12-aarch64-neonfma-cortex-a53.S", 1256 "src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a53.S", 1257 "src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a57.S", 1258 "src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a75.S", 1259 "src/f32-gemm/gen/4x12-aarch64-neonfma-cortex-a53.S", 1260 "src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a53.S", 1261 "src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a57.S", 1262 "src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a75.S", 1263 "src/f32-gemm/gen/4x8-aarch64-neonfma-ld128.S", 1264 "src/f32-gemm/gen/4x8-aarch64-neonfma-ld64.S", 1265 "src/f32-gemm/gen/5x8-aarch64-neonfma-cortex-a57.S", 1266 "src/f32-gemm/gen/5x8-aarch64-neonfma-cortex-a75.S", 1267 "src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a53.S", 1268 "src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a73.S", 1269 "src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a57.S", 1270 "src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a75.S", 1271 "src/f32-gemm/gen/6x8-aarch64-neonfma-ld128.S", 1272 "src/f32-gemm/gen/6x8-aarch64-neonfma-ld64.S", 1273 "src/f32-gemm/gen-inc/1x12-aarch64-neonfma-cortex-a53.S", 1274 "src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a53.S", 1275 "src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a57.S", 1276 "src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a75.S", 1277 "src/f32-gemm/gen-inc/4x12-aarch64-neonfma-cortex-a53.S", 1278 "src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a53.S", 1279 "src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a57.S", 1280 "src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a75.S", 1281 "src/f32-gemm/gen-inc/4x8-aarch64-neonfma-ld128.S", 1282 "src/f32-gemm/gen-inc/4x8-aarch64-neonfma-ld64.S", 1283 "src/f32-gemm/gen-inc/5x8-aarch64-neonfma-cortex-a57.S", 1284 "src/f32-gemm/gen-inc/5x8-aarch64-neonfma-cortex-a75.S", 1285 "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a53.S", 1286 "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a73.S", 1287 "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a57.S", 1288 "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a75.S", 1289 "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-ld128.S", 1290 "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-ld64.S", 1291 "src/f32-igemm/1x12-aarch64-neonfma-cortex-a53.S", 1292 "src/f32-igemm/1x8-aarch64-neonfma-cortex-a53.S", 1293 "src/f32-igemm/gen/1x8-aarch64-neonfma-cortex-a57.S", 1294 "src/f32-igemm/gen/1x8-aarch64-neonfma-cortex-a75.S", 1295 "src/f32-igemm/4x12-aarch64-neonfma-cortex-a53.S", 1296 "src/f32-igemm/4x8-aarch64-neonfma-cortex-a53.S", 1297 "src/f32-igemm/gen/4x8-aarch64-neonfma-cortex-a57.S", 1298 "src/f32-igemm/gen/4x8-aarch64-neonfma-cortex-a75.S", 1299 "src/f32-igemm/gen/5x8-aarch64-neonfma-cortex-a57.S", 1300 "src/f32-igemm/gen/5x8-aarch64-neonfma-cortex-a75.S", 1301 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a53.S", 1302 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a73.S", 1303 "src/f32-igemm/gen/6x8-aarch64-neonfma-cortex-a57.S", 1304 "src/f32-igemm/gen/6x8-aarch64-neonfma-cortex-a75.S", 1305] 1306 1307cc_defaults { 1308 name: "xnnpack_internal_default", 1309 vendor_available: true, 1310 sdk_version: "current", 1311 local_include_dirs: [ 1312 "include", 1313 "src", 1314 ], 1315 cflags: [ 1316 "-std=c99", 1317 "-DXNN_LOG_LEVEL=2", 1318 "-Wno-unused-parameter", 1319 "-Wno-missing-field-initializers", 1320 "-Wno-pointer-arith", 1321 ], 1322 stl: "libc++_static", 1323} 1324 1325cc_library_static { 1326 name: "xnnpack_tables", 1327 defaults: ["xnnpack_internal_default"], 1328 srcs: TABLE_SRCS, 1329} 1330 1331cc_library_static { 1332 name: "xnnpack_im2col", 1333 defaults: ["xnnpack_internal_default"], 1334 srcs: [ 1335 "src/im2col.c", 1336 ], 1337} 1338 1339cc_library_static { 1340 name: "xnnpack_indirection", 1341 defaults: ["xnnpack_internal_default"], 1342 srcs: [ 1343 "src/indirection.c", 1344 ], 1345 header_libs: [ 1346 "fp16_headers", 1347 "fxdiv_headers", 1348 ], 1349 static_libs: [ 1350 "libpthreadpool", 1351 ], 1352} 1353 1354cc_library_static { 1355 name: "xnnpack_operator_run", 1356 defaults: ["xnnpack_internal_default"], 1357 srcs: [ 1358 "src/operator-run.c", 1359 ], 1360 cflags: [ 1361 "-Wno-vla", 1362 ], 1363 header_libs: [ 1364 "fp16_headers", 1365 "fxdiv_headers", 1366 ], 1367 static_libs: [ 1368 "libclog", 1369 "libpthreadpool", 1370 ], 1371} 1372 1373cc_library_static { 1374 name: "xnnpack_operators", 1375 defaults: ["xnnpack_internal_default"], 1376 srcs: OPERATOR_SRCS + [ 1377 "src/memory.c", 1378 "src/operator-delete.c", 1379 ], 1380 header_libs: [ 1381 "fp16_headers", 1382 "fxdiv_headers", 1383 ], 1384 static_libs: [ 1385 "libclog", 1386 "libpthreadpool", 1387 ], 1388 whole_static_libs: [ 1389 "xnnpack_indirection", 1390 ], 1391} 1392 1393cc_library_static { 1394 name: "xnnpack_scalar_ukernels", 1395 defaults: ["xnnpack_internal_default"], 1396 srcs: SCALAR_UKERNELS, 1397 header_libs: [ 1398 "fp16_headers", 1399 "fxdiv_headers", 1400 ], 1401 static_libs: [ 1402 "libpthreadpool", 1403 "xnnpack_tables", 1404 ], 1405} 1406 1407cc_library_static { 1408 name: "xnnpack_psimd_fastmath_ukernels", 1409 defaults: ["xnnpack_internal_default"], 1410 srcs: PSIMD_FASTMATH_UKERNELS, 1411 cflags: [ 1412 "-O3", 1413 "-ffast-math", 1414 ], 1415 arch: { 1416 arm: { 1417 cflags: [ 1418 "-marm", 1419 "-mfpu=neon", 1420 ], 1421 }, 1422 }, 1423 header_libs: [ 1424 "fp16_headers", 1425 "psimd_headers", 1426 ], 1427 static_libs: [ 1428 "libpthreadpool", 1429 "xnnpack_tables", 1430 ], 1431} 1432 1433cc_library_static { 1434 name: "xnnpack_psimd_accmath_ukernels", 1435 defaults: ["xnnpack_internal_default"], 1436 srcs: PSIMD_ACCMATH_UKERNELS, 1437 cflags: [ 1438 "-O3", 1439 ], 1440 arch: { 1441 arm: { 1442 cflags: [ 1443 "-marm", 1444 "-mfpu=neon", 1445 ], 1446 }, 1447 }, 1448 header_libs: [ 1449 "fp16_headers", 1450 "psimd_headers", 1451 ], 1452 static_libs: [ 1453 "libpthreadpool", 1454 "xnnpack_tables", 1455 ], 1456} 1457 1458cc_library_static { 1459 name: "xnnpack_neon_ukernels", 1460 defaults: ["xnnpack_internal_default"], 1461 arch: { 1462 arm: { 1463 srcs: NEON_UKERNELS, 1464 cflags: [ 1465 "-marm", 1466 "-mfpu=neon", 1467 ], 1468 }, 1469 arm64: { 1470 srcs: NEON_UKERNELS, 1471 }, 1472 x86: { enabled: false, }, 1473 x86_64: { enabled: false, }, 1474 }, 1475 header_libs: [ 1476 "fp16_headers", 1477 ], 1478 static_libs: [ 1479 "libpthreadpool", 1480 "xnnpack_tables", 1481 ], 1482} 1483 1484cc_library_static { 1485 name: "xnnpack_neonfma_ukernels", 1486 defaults: ["xnnpack_internal_default"], 1487 arch: { 1488 arm: { 1489 srcs: NEONFMA_UKERNELS, 1490 cflags: [ 1491 "-marm", 1492 "-mfpu=neon-vfpv4", 1493 ], 1494 }, 1495 arm64: { 1496 srcs: NEONFMA_UKERNELS + AARCH64_NEONFMA_UKERNELS, 1497 }, 1498 x86: { enabled: false, }, 1499 x86_64: { enabled: false, }, 1500 }, 1501 header_libs: [ 1502 "fp16_headers", 1503 ], 1504 static_libs: [ 1505 "libpthreadpool", 1506 "xnnpack_tables", 1507 ], 1508} 1509 1510cc_library_static { 1511 name: "xnnpack_neonfp16arith_ukernels", 1512 defaults: ["xnnpack_internal_default"], 1513 arch: { 1514 arm: { enabled: false, }, 1515 arm64: { 1516 srcs: AARCH64_NEONFP16ARITH_UKERNELS, 1517 cflags: [ 1518 "-march=armv8.2-a+fp16", 1519 ], 1520 }, 1521 x86: { enabled: false, }, 1522 x86_64: { enabled: false, }, 1523 }, 1524 header_libs: [ 1525 "fp16_headers", 1526 ], 1527 static_libs: [ 1528 "libpthreadpool", 1529 "xnnpack_tables", 1530 ], 1531} 1532 1533cc_library_static { 1534 name: "xnnpack_asm_ukernels", 1535 defaults: ["xnnpack_internal_default"], 1536 arch: { 1537 arm: { 1538 srcs: AARCH32_ASM_UKERNELS, 1539 }, 1540 arm64: { 1541 srcs: AARCH64_ASM_UKERNELS, 1542 }, 1543 x86: { enabled: false, }, 1544 x86_64: { enabled: false, }, 1545 }, 1546} 1547 1548cc_library_static { 1549 name: "xnnpack_sse2_ukernels", 1550 defaults: ["xnnpack_internal_default"], 1551 arch: { 1552 arm: { enabled: false, }, 1553 arm64: { enabled: false, }, 1554 x86: { 1555 srcs: SSE_UKERNELS + SSE2_UKERNELS, 1556 cflags: [ 1557 "-msse2", 1558 ], 1559 }, 1560 x86_64: { 1561 srcs: SSE_UKERNELS + SSE2_UKERNELS, 1562 cflags: [ 1563 "-msse2", 1564 ], 1565 }, 1566 }, 1567 header_libs: [ 1568 "fp16_headers", 1569 ], 1570 static_libs: [ 1571 "libpthreadpool", 1572 "xnnpack_tables", 1573 ], 1574} 1575 1576cc_library_static { 1577 name: "xnnpack_sse41_ukernels", 1578 defaults: ["xnnpack_internal_default"], 1579 arch: { 1580 arm: { enabled: false, }, 1581 arm64: { enabled: false, }, 1582 x86: { 1583 srcs: SSE41_UKERNELS, 1584 cflags: [ 1585 "-msse4.1", 1586 ], 1587 }, 1588 x86_64: { 1589 srcs: SSE41_UKERNELS, 1590 cflags: [ 1591 "-msse4.1", 1592 ], 1593 }, 1594 }, 1595 header_libs: [ 1596 "fp16_headers", 1597 ], 1598 static_libs: [ 1599 "libpthreadpool", 1600 "xnnpack_tables", 1601 ], 1602} 1603 1604cc_library_static { 1605 name: "xnnpack_avx_ukernels", 1606 defaults: ["xnnpack_internal_default"], 1607 arch: { 1608 arm: { enabled: false, }, 1609 arm64: { enabled: false, }, 1610 x86: { 1611 srcs: AVX_UKERNELS, 1612 cflags: [ 1613 "-mavx", 1614 ], 1615 }, 1616 x86_64: { 1617 srcs: AVX_UKERNELS, 1618 cflags: [ 1619 "-mavx", 1620 ], 1621 }, 1622 }, 1623 header_libs: [ 1624 "fp16_headers", 1625 ], 1626 static_libs: [ 1627 "libpthreadpool", 1628 "xnnpack_tables", 1629 ], 1630} 1631 1632cc_library_static { 1633 name: "xnnpack_fma3_ukernels", 1634 defaults: ["xnnpack_internal_default"], 1635 arch: { 1636 arm: { enabled: false, }, 1637 arm64: { enabled: false, }, 1638 x86: { 1639 srcs: FMA3_UKERNELS, 1640 cflags: [ 1641 "-mfma", 1642 ], 1643 }, 1644 x86_64: { 1645 srcs: FMA3_UKERNELS, 1646 cflags: [ 1647 "-mfma", 1648 ], 1649 }, 1650 }, 1651 header_libs: [ 1652 "fp16_headers", 1653 ], 1654 static_libs: [ 1655 "libpthreadpool", 1656 "xnnpack_tables", 1657 ], 1658} 1659 1660cc_library_static { 1661 name: "xnnpack_avx2_ukernels", 1662 defaults: ["xnnpack_internal_default"], 1663 arch: { 1664 arm: { enabled: false, }, 1665 arm64: { enabled: false, }, 1666 x86: { 1667 srcs: AVX2_UKERNELS, 1668 cflags: [ 1669 "-mfma", 1670 "-mavx2", 1671 ], 1672 }, 1673 x86_64: { 1674 srcs: AVX2_UKERNELS, 1675 cflags: [ 1676 "-mfma", 1677 "-mavx2", 1678 ], 1679 }, 1680 }, 1681 header_libs: [ 1682 "fp16_headers", 1683 ], 1684 static_libs: [ 1685 "libpthreadpool", 1686 "xnnpack_tables", 1687 ], 1688} 1689 1690cc_library_static { 1691 name: "xnnpack_avx512f_ukernels", 1692 defaults: ["xnnpack_internal_default"], 1693 arch: { 1694 arm: { enabled: false, }, 1695 arm64: { enabled: false, }, 1696 x86: { 1697 srcs: AVX512F_UKERNELS, 1698 cflags: [ 1699 "-mavx512f", 1700 ], 1701 }, 1702 x86_64: { 1703 srcs: AVX512F_UKERNELS, 1704 cflags: [ 1705 "-mavx512f", 1706 ], 1707 }, 1708 }, 1709 header_libs: [ 1710 "fp16_headers", 1711 ], 1712 static_libs: [ 1713 "libpthreadpool", 1714 "xnnpack_tables", 1715 ], 1716} 1717 1718cc_library_static { 1719 name: "xnnpack_ukernels", 1720 defaults: ["xnnpack_internal_default"], 1721 arch: { 1722 arm: { 1723 whole_static_libs: [ 1724 "xnnpack_psimd_fastmath_ukernels", 1725 "xnnpack_psimd_accmath_ukernels", 1726 "xnnpack_neon_ukernels", 1727 "xnnpack_neonfma_ukernels", 1728 "xnnpack_asm_ukernels", 1729 ], 1730 }, 1731 arm64: { 1732 whole_static_libs: [ 1733 "xnnpack_psimd_fastmath_ukernels", 1734 "xnnpack_psimd_accmath_ukernels", 1735 "xnnpack_neon_ukernels", 1736 "xnnpack_neonfma_ukernels", 1737 "xnnpack_neonfp16arith_ukernels", 1738 "xnnpack_asm_ukernels", 1739 ], 1740 }, 1741 x86: { 1742 whole_static_libs: [ 1743 "xnnpack_psimd_fastmath_ukernels", 1744 "xnnpack_psimd_accmath_ukernels", 1745 "xnnpack_sse2_ukernels", 1746 "xnnpack_sse41_ukernels", 1747 "xnnpack_avx_ukernels", 1748 "xnnpack_fma3_ukernels", 1749 "xnnpack_avx2_ukernels", 1750 "xnnpack_avx512f_ukernels", 1751 ], 1752 }, 1753 x86_64: { 1754 whole_static_libs: [ 1755 "xnnpack_psimd_fastmath_ukernels", 1756 "xnnpack_psimd_accmath_ukernels", 1757 "xnnpack_sse2_ukernels", 1758 "xnnpack_sse41_ukernels", 1759 "xnnpack_avx_ukernels", 1760 "xnnpack_fma3_ukernels", 1761 "xnnpack_avx2_ukernels", 1762 "xnnpack_avx512f_ukernels", 1763 ], 1764 }, 1765 }, 1766 whole_static_libs: [ 1767 "xnnpack_scalar_ukernels", 1768 "xnnpack_tables", 1769 ], 1770} 1771 1772cc_library_static { 1773 name: "libXNNPACK", 1774 defaults: ["xnnpack_internal_default"], 1775 export_include_dirs: ["include"], 1776 srcs: [ 1777 "src/init.c", 1778 "src/runtime.c", 1779 "src/subgraph.c", 1780 "src/tensor.c", 1781 ], 1782 whole_static_libs: [ 1783 "libclog", 1784 "libcpuinfo", 1785 "libpthreadpool", 1786 "xnnpack_ukernels", 1787 "xnnpack_operator_run", 1788 "xnnpack_operators", 1789 ], 1790} 1791 1792// Tests and benchmarks 1793cc_defaults { 1794 name: "xnnpack_tests_default", 1795 vendor_available: true, 1796 stl: "libc++_static", 1797 local_include_dirs: [ 1798 "bench", 1799 "models", 1800 "test", 1801 "src", 1802 ], 1803 cflags: [ 1804 "-Wno-unused-function" 1805 ], 1806 header_libs: [ 1807 "fp16_headers", 1808 ], 1809 static_libs: [ 1810 "libXNNPACK", 1811 "libpthreadpool", 1812 "libgmock", 1813 ], 1814 shared_libs: [ 1815 "liblog", 1816 ], 1817} 1818 1819cc_library_static { 1820 name: "xnnpack_mobilenet_v1", 1821 defaults: ["xnnpack_tests_default"], 1822 srcs: [ 1823 "models/mobilenet-v1.cc", 1824 ], 1825} 1826 1827cc_library_static { 1828 name: "xnnpack_mobilenet_v2", 1829 defaults: ["xnnpack_tests_default"], 1830 srcs: [ 1831 "models/mobilenet-v2.cc", 1832 ], 1833} 1834 1835cc_library_static { 1836 name: "xnnpack_mobilenet_v3_large", 1837 defaults: ["xnnpack_tests_default"], 1838 srcs: [ 1839 "models/mobilenet-v3-large.cc", 1840 ], 1841} 1842 1843cc_library_static { 1844 name: "xnnpack_mobilenet_v3_small", 1845 defaults: ["xnnpack_tests_default"], 1846 srcs: [ 1847 "models/mobilenet-v3-small.cc", 1848 ], 1849} 1850 1851cc_benchmark { 1852 name: "xnnpack_end2end_bench", 1853 defaults: ["xnnpack_tests_default"], 1854 srcs: [ 1855 "bench/end2end.cc", 1856 "bench/utils.cc", 1857 ], 1858 cflags: [ 1859 "-Wno-unused-result" 1860 ], 1861 static_libs: [ 1862 "libcpuinfo", 1863 "libgoogle-benchmark", 1864 "xnnpack_mobilenet_v1", 1865 "xnnpack_mobilenet_v2", 1866 "xnnpack_mobilenet_v3_large", 1867 "xnnpack_mobilenet_v3_small", 1868 ], 1869} 1870 1871cc_test { 1872 name: "xnnpack_add_nc_test", 1873 defaults: ["xnnpack_tests_default"], 1874 srcs: [ 1875 "test/add-nc.cc", 1876 ], 1877 test_suites: [ 1878 "general-tests", 1879 ], 1880} 1881 1882cc_test { 1883 name: "xnnpack_add_nd_test", 1884 defaults: ["xnnpack_tests_default"], 1885 srcs: [ 1886 "test/add-nd.cc", 1887 ], 1888 test_suites: [ 1889 "general-tests", 1890 ], 1891} 1892 1893cc_test { 1894 name: "xnnpack_argmax_pooling_nhwc_test", 1895 defaults: ["xnnpack_tests_default"], 1896 srcs: [ 1897 "test/argmax-pooling-nhwc.cc", 1898 ], 1899 test_suites: [ 1900 "general-tests", 1901 ], 1902} 1903 1904cc_test { 1905 name: "xnnpack_average_pooling_nhwc_test", 1906 defaults: ["xnnpack_tests_default"], 1907 srcs: [ 1908 "test/average-pooling-nhwc.cc", 1909 ], 1910 test_suites: [ 1911 "general-tests", 1912 ], 1913} 1914 1915cc_test { 1916 name: "xnnpack_channel_pad_nc_test", 1917 defaults: ["xnnpack_tests_default"], 1918 srcs: [ 1919 "test/channel-pad-nc.cc", 1920 ], 1921 test_suites: [ 1922 "general-tests", 1923 ], 1924} 1925 1926cc_test { 1927 name: "xnnpack_channel_shuffle_nc_test", 1928 defaults: ["xnnpack_tests_default"], 1929 srcs: [ 1930 "test/channel-shuffle-nc.cc", 1931 ], 1932 test_suites: [ 1933 "general-tests", 1934 ], 1935} 1936 1937cc_test { 1938 name: "xnnpack_clamp_nc_test", 1939 defaults: ["xnnpack_tests_default"], 1940 srcs: [ 1941 "test/clamp-nc.cc", 1942 ], 1943 test_suites: [ 1944 "general-tests", 1945 ], 1946} 1947 1948cc_test { 1949 name: "xnnpack_convolution_nhwc_test", 1950 defaults: ["xnnpack_tests_default"], 1951 srcs: [ 1952 "test/convolution-nhwc.cc", 1953 ], 1954 test_suites: [ 1955 "general-tests", 1956 ], 1957} 1958 1959cc_test { 1960 name: "xnnpack_convolution_nchw_test", 1961 defaults: ["xnnpack_tests_default"], 1962 srcs: [ 1963 "test/convolution-nchw.cc", 1964 ], 1965 test_suites: [ 1966 "general-tests", 1967 ], 1968} 1969 1970cc_test { 1971 name: "xnnpack_deconvolution_nhwc_test", 1972 defaults: ["xnnpack_tests_default"], 1973 srcs: [ 1974 "test/deconvolution-nhwc.cc", 1975 ], 1976 test_suites: [ 1977 "general-tests", 1978 ], 1979} 1980 1981cc_test { 1982 name: "xnnpack_divide_nd_test", 1983 defaults: ["xnnpack_tests_default"], 1984 srcs: [ 1985 "test/divide-nd.cc", 1986 ], 1987 test_suites: [ 1988 "general-tests", 1989 ], 1990} 1991 1992cc_test { 1993 name: "xnnpack_fully_connected_nc_test", 1994 defaults: ["xnnpack_tests_default"], 1995 srcs: [ 1996 "test/fully-connected-nc.cc", 1997 ], 1998 test_suites: [ 1999 "general-tests", 2000 ], 2001} 2002 2003cc_test { 2004 name: "xnnpack_global_average_pooling_nwc_test", 2005 defaults: ["xnnpack_tests_default"], 2006 srcs: [ 2007 "test/global-average-pooling-nwc.cc", 2008 ], 2009 test_suites: [ 2010 "general-tests", 2011 ], 2012} 2013 2014cc_test { 2015 name: "xnnpack_global_average_pooling_ncw_test", 2016 defaults: ["xnnpack_tests_default"], 2017 srcs: [ 2018 "test/global-average-pooling-ncw.cc", 2019 ], 2020 test_suites: [ 2021 "general-tests", 2022 ], 2023} 2024 2025cc_test { 2026 name: "xnnpack_hardswish_nc_test", 2027 defaults: ["xnnpack_tests_default"], 2028 srcs: [ 2029 "test/hardswish-nc.cc", 2030 ], 2031 test_suites: [ 2032 "general-tests", 2033 ], 2034} 2035 2036cc_test { 2037 name: "xnnpack_leaky_relu_nc_test", 2038 defaults: ["xnnpack_tests_default"], 2039 srcs: [ 2040 "test/leaky-relu-nc.cc", 2041 ], 2042 test_suites: [ 2043 "general-tests", 2044 ], 2045} 2046 2047cc_test { 2048 name: "xnnpack_max_pooling_nhwc_test", 2049 defaults: ["xnnpack_tests_default"], 2050 srcs: [ 2051 "test/max-pooling-nhwc.cc", 2052 ], 2053 test_suites: [ 2054 "general-tests", 2055 ], 2056} 2057 2058cc_test { 2059 name: "xnnpack_maximum_nd_test", 2060 defaults: ["xnnpack_tests_default"], 2061 srcs: [ 2062 "test/maximum-nd.cc", 2063 ], 2064 test_suites: [ 2065 "general-tests", 2066 ], 2067} 2068 2069cc_test { 2070 name: "xnnpack_minimum_nd_test", 2071 defaults: ["xnnpack_tests_default"], 2072 srcs: [ 2073 "test/minimum-nd.cc", 2074 ], 2075 test_suites: [ 2076 "general-tests", 2077 ], 2078} 2079 2080cc_test { 2081 name: "xnnpack_multiply_nd_test", 2082 defaults: ["xnnpack_tests_default"], 2083 srcs: [ 2084 "test/multiply-nd.cc", 2085 ], 2086 test_suites: [ 2087 "general-tests", 2088 ], 2089} 2090 2091cc_test { 2092 name: "xnnpack_prelu_nc_test", 2093 defaults: ["xnnpack_tests_default"], 2094 srcs: [ 2095 "test/prelu-nc.cc", 2096 ], 2097 test_suites: [ 2098 "general-tests", 2099 ], 2100} 2101 2102cc_test { 2103 name: "xnnpack_resize_bilinear_nhwc_test", 2104 defaults: ["xnnpack_tests_default"], 2105 srcs: [ 2106 "test/resize-bilinear-nhwc.cc", 2107 ], 2108 test_suites: [ 2109 "general-tests", 2110 ], 2111} 2112 2113cc_test { 2114 name: "xnnpack_sigmoid_nc_test", 2115 defaults: ["xnnpack_tests_default"], 2116 srcs: [ 2117 "test/sigmoid-nc.cc", 2118 ], 2119 test_suites: [ 2120 "general-tests", 2121 ], 2122} 2123 2124cc_test { 2125 name: "xnnpack_softmax_nc_test", 2126 defaults: ["xnnpack_tests_default"], 2127 srcs: [ 2128 "test/softmax-nc.cc", 2129 ], 2130 test_suites: [ 2131 "general-tests", 2132 ], 2133} 2134 2135cc_test { 2136 name: "xnnpack_subtract_nd_test", 2137 defaults: ["xnnpack_tests_default"], 2138 srcs: [ 2139 "test/subtract-nd.cc", 2140 ], 2141 test_suites: [ 2142 "general-tests", 2143 ], 2144} 2145 2146cc_test { 2147 name: "xnnpack_unpooling_nhwc_test", 2148 defaults: ["xnnpack_tests_default"], 2149 srcs: [ 2150 "test/unpooling-nhwc.cc", 2151 ], 2152 test_suites: [ 2153 "general-tests", 2154 ], 2155} 2156