• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright (C) 2020 The Android Open Source Project
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15OPERATOR_SRCS = [
16    "src/add-nc.c",
17    "src/argmax-pooling-nhwc.c",
18    "src/average-pooling-nhwc.c",
19    "src/binary-elementwise-nd.c",
20    "src/channel-pad-nc.c",
21    "src/channel-shuffle-nc.c",
22    "src/clamp-nc.c",
23    "src/convolution-nchw.c",
24    "src/convolution-nhwc.c",
25    "src/deconvolution-nhwc.c",
26    "src/fully-connected-nc.c",
27    "src/global-average-pooling-ncw.c",
28    "src/global-average-pooling-nwc.c",
29    "src/hardswish-nc.c",
30    "src/leaky-relu-nc.c",
31    "src/max-pooling-nhwc.c",
32    "src/prelu-nc.c",
33    "src/resize-bilinear-nhwc.c",
34    "src/sigmoid-nc.c",
35    "src/softmax-nc.c",
36    "src/unpooling-nhwc.c",
37]
38
39TABLE_SRCS = [
40    "src/tables/exp2-k-over-64.c",
41    "src/tables/exp2-k-over-2048.c",
42]
43
44SCALAR_UKERNELS = [
45    "src/f32-argmaxpool/4x-scalar-c1.c",
46    "src/f32-argmaxpool/9p8x-scalar-c1.c",
47    "src/f32-argmaxpool/9x-scalar-c1.c",
48    "src/f32-avgpool/mp9p8q-scalar.c",
49    "src/f32-avgpool/up9-scalar.c",
50    "src/f32-bilinear/gen/scalar-c1.c",
51    "src/f32-bilinear/gen/scalar-c2.c",
52    "src/f32-bilinear/gen/scalar-c4.c",
53    "src/f32-clamp/scalar.c",
54    "src/f32-conv-hwc/3x3s2p0p1c3x4-scalar-1x1.c",
55    "src/f32-conv-hwc/3x3s2p1c3x4-scalar-1x1.c",
56    "src/f32-conv-hwc2spchw/3x3s2p1c3x4-scalar-1x1.c",
57    "src/f32-dwconv-spchw/3x3p1-scalar.c",
58    "src/f32-dwconv-spchw/3x3s2p1-scalar.c",
59    "src/f32-dwconv-spchw/5x5p2-scalar.c",
60    "src/f32-dwconv-spchw/5x5s2p2-scalar.c",
61    "src/f32-dwconv/gen/up1x25-scalar-acc2.c",
62    "src/f32-dwconv/gen/up1x25-scalar.c",
63    "src/f32-dwconv/gen/up1x4-scalar-acc2.c",
64    "src/f32-dwconv/gen/up1x4-scalar.c",
65    "src/f32-dwconv/gen/up1x9-scalar-acc2.c",
66    "src/f32-dwconv/gen/up1x9-scalar.c",
67    "src/f32-dwconv/gen/up2x25-scalar-acc2.c",
68    "src/f32-dwconv/gen/up2x25-scalar.c",
69    "src/f32-dwconv/gen/up2x4-scalar-acc2.c",
70    "src/f32-dwconv/gen/up2x4-scalar.c",
71    "src/f32-dwconv/gen/up2x9-scalar-acc2.c",
72    "src/f32-dwconv/gen/up2x9-scalar.c",
73    "src/f32-gavgpool-spchw/scalar-x1.c",
74    "src/f32-gavgpool/mp7p7q-scalar.c",
75    "src/f32-gavgpool/up7-scalar.c",
76    "src/f32-gemm/gen-inc/1x4-scalar.c",
77    "src/f32-gemm/gen-inc/2x4-scalar.c",
78    "src/f32-gemm/gen-inc/4x4-scalar.c",
79    "src/f32-gemm/gen/1x4-scalar.c",
80    "src/f32-gemm/gen/2x4-scalar.c",
81    "src/f32-gemm/gen/4x2-scalar.c",
82    "src/f32-gemm/gen/4x4-scalar.c",
83    "src/f32-hswish/gen/scalar-x1.c",
84    "src/f32-hswish/gen/scalar-x2.c",
85    "src/f32-hswish/gen/scalar-x4.c",
86    "src/f32-igemm/gen/1x4-scalar.c",
87    "src/f32-igemm/gen/2x4-scalar.c",
88    "src/f32-igemm/gen/4x2-scalar.c",
89    "src/f32-igemm/gen/4x4-scalar.c",
90    "src/f32-maxpool/9p8x-scalar-c1.c",
91    "src/f32-pavgpool/mp9p8q-scalar.c",
92    "src/f32-pavgpool/up9-scalar.c",
93    "src/f32-ppmm/gen/2x4-scalar.c",
94    "src/f32-ppmm/gen/3x3-scalar.c",
95    "src/f32-ppmm/gen/4x2-scalar.c",
96    "src/f32-ppmm/gen/4x4-scalar.c",
97    "src/f32-prelu/gen/scalar-2x1.c",
98    "src/f32-prelu/gen/scalar-2x4.c",
99    "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x1.c",
100    "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x2.c",
101    "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x2-acc2.c",
102    "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x4.c",
103    "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x4-acc2.c",
104    "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x4-acc4.c",
105    "src/f32-raddstoreexpminusmax/gen/scalar-p5-x1.c",
106    "src/f32-raddstoreexpminusmax/gen/scalar-p5-x2.c",
107    "src/f32-raddstoreexpminusmax/gen/scalar-p5-x2-acc2.c",
108    "src/f32-raddstoreexpminusmax/gen/scalar-p5-x4.c",
109    "src/f32-raddstoreexpminusmax/gen/scalar-p5-x4-acc2.c",
110    "src/f32-raddstoreexpminusmax/gen/scalar-p5-x4-acc4.c",
111    "src/f32-rmax/scalar.c",
112    "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x1.c",
113    "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x2.c",
114    "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x4.c",
115    "src/f32-sigmoid/gen/scalar-lut64-p2-div-x1.c",
116    "src/f32-sigmoid/gen/scalar-lut64-p2-div-x2.c",
117    "src/f32-sigmoid/gen/scalar-lut64-p2-div-x4.c",
118    "src/f32-sigmoid/gen/scalar-p5-div-x1.c",
119    "src/f32-sigmoid/gen/scalar-p5-div-x2.c",
120    "src/f32-sigmoid/gen/scalar-p5-div-x4.c",
121    "src/f32-spmm/gen/1x1-scalar-pipelined.c",
122    "src/f32-spmm/gen/1x1-scalar.c",
123    "src/f32-spmm/gen/2x1-scalar-pipelined.c",
124    "src/f32-spmm/gen/2x1-scalar.c",
125    "src/f32-spmm/gen/4x1-scalar-pipelined.c",
126    "src/f32-spmm/gen/4x1-scalar.c",
127    "src/f32-spmm/gen/8x1-scalar-pipelined.c",
128    "src/f32-spmm/gen/8x1-scalar.c",
129    "src/f32-spmm/gen/8x2-scalar.c",
130    "src/f32-spmm/gen/8x4-scalar.c",
131    "src/f32-vbinary/gen/vadd-scalar-x1.c",
132    "src/f32-vbinary/gen/vadd-scalar-x2.c",
133    "src/f32-vbinary/gen/vadd-scalar-x4.c",
134    "src/f32-vbinary/gen/vaddc-scalar-x1.c",
135    "src/f32-vbinary/gen/vaddc-scalar-x2.c",
136    "src/f32-vbinary/gen/vaddc-scalar-x4.c",
137    "src/f32-vbinary/gen/vdiv-scalar-x1.c",
138    "src/f32-vbinary/gen/vdiv-scalar-x2.c",
139    "src/f32-vbinary/gen/vdiv-scalar-x4.c",
140    "src/f32-vbinary/gen/vdivc-scalar-x1.c",
141    "src/f32-vbinary/gen/vdivc-scalar-x2.c",
142    "src/f32-vbinary/gen/vdivc-scalar-x4.c",
143    "src/f32-vbinary/gen/vmax-scalar-x1.c",
144    "src/f32-vbinary/gen/vmax-scalar-x2.c",
145    "src/f32-vbinary/gen/vmax-scalar-x4.c",
146    "src/f32-vbinary/gen/vmaxc-scalar-x1.c",
147    "src/f32-vbinary/gen/vmaxc-scalar-x2.c",
148    "src/f32-vbinary/gen/vmaxc-scalar-x4.c",
149    "src/f32-vbinary/gen/vmin-scalar-x1.c",
150    "src/f32-vbinary/gen/vmin-scalar-x2.c",
151    "src/f32-vbinary/gen/vmin-scalar-x4.c",
152    "src/f32-vbinary/gen/vminc-scalar-x1.c",
153    "src/f32-vbinary/gen/vminc-scalar-x2.c",
154    "src/f32-vbinary/gen/vminc-scalar-x4.c",
155    "src/f32-vbinary/gen/vmul-scalar-x1.c",
156    "src/f32-vbinary/gen/vmul-scalar-x2.c",
157    "src/f32-vbinary/gen/vmul-scalar-x4.c",
158    "src/f32-vbinary/gen/vmulc-scalar-x1.c",
159    "src/f32-vbinary/gen/vmulc-scalar-x2.c",
160    "src/f32-vbinary/gen/vmulc-scalar-x4.c",
161    "src/f32-vbinary/gen/vrdivc-scalar-x1.c",
162    "src/f32-vbinary/gen/vrdivc-scalar-x2.c",
163    "src/f32-vbinary/gen/vrdivc-scalar-x4.c",
164    "src/f32-vbinary/gen/vrsubc-scalar-x1.c",
165    "src/f32-vbinary/gen/vrsubc-scalar-x2.c",
166    "src/f32-vbinary/gen/vrsubc-scalar-x4.c",
167    "src/f32-vbinary/gen/vsub-scalar-x1.c",
168    "src/f32-vbinary/gen/vsub-scalar-x2.c",
169    "src/f32-vbinary/gen/vsub-scalar-x4.c",
170    "src/f32-vbinary/gen/vsubc-scalar-x1.c",
171    "src/f32-vbinary/gen/vsubc-scalar-x2.c",
172    "src/f32-vbinary/gen/vsubc-scalar-x4.c",
173    "src/f32-vmulcaddc/gen/c1-scalar-2x.c",
174    "src/f32-vmulcaddc/gen/c2-scalar-2x.c",
175    "src/f32-vmulcaddc/gen/c4-scalar-2x.c",
176    "src/math/expminus-scalar-lut2048-p1.c",
177    "src/math/expminus-scalar-lut64-p2.c",
178    "src/math/expminus-scalar-p5.c",
179    "src/math/sigmoid-scalar-lut2048-p1-div.c",
180    "src/math/sigmoid-scalar-lut64-p2-div.c",
181    "src/math/sigmoid-scalar-p5-div.c",
182    "src/q8-avgpool/mp9p8q-scalar.c",
183    "src/q8-avgpool/up9-scalar.c",
184    "src/q8-dwconv/up1x9-scalar.c",
185    "src/q8-gavgpool/mp7p7q-scalar.c",
186    "src/q8-gavgpool/up7-scalar.c",
187    "src/q8-gemm/2x2-scalar.c",
188    "src/q8-igemm/2x2-scalar.c",
189    "src/q8-vadd/scalar.c",
190    "src/u8-clamp/scalar.c",
191    "src/u8-lut32norm/scalar.c",
192    "src/u8-maxpool/9p8x-scalar-c1.c",
193    "src/u8-rmax/scalar.c",
194    "src/x32-packx/x2-scalar.c",
195    "src/x32-packx/x3-scalar.c",
196    "src/x32-packx/x4-scalar.c",
197    "src/x32-pad/x2-scalar.c",
198    "src/x32-unpool/scalar.c",
199    "src/x32-zip/x2-scalar.c",
200    "src/x32-zip/x3-scalar.c",
201    "src/x32-zip/x4-scalar.c",
202    "src/x32-zip/xm-scalar.c",
203    "src/x8-lut/scalar.c",
204    "src/x8-zip/x2-scalar.c",
205    "src/x8-zip/x3-scalar.c",
206    "src/x8-zip/x4-scalar.c",
207    "src/x8-zip/xm-scalar.c",
208]
209
210PSIMD_FASTMATH_UKERNELS = [
211    "src/f32-argmaxpool/4x-psimd-c4.c",
212    "src/f32-argmaxpool/9p8x-psimd-c4.c",
213    "src/f32-argmaxpool/9x-psimd-c4.c",
214    "src/f32-avgpool/mp9p8q-psimd.c",
215    "src/f32-avgpool/up9-psimd.c",
216    "src/f32-bilinear/gen/psimd-c4.c",
217    "src/f32-bilinear/gen/psimd-c8.c",
218    "src/f32-clamp/psimd.c",
219    "src/f32-dwconv/gen/up4x25-psimd-acc2.c",
220    "src/f32-dwconv/gen/up4x25-psimd.c",
221    "src/f32-dwconv/gen/up4x4-psimd-acc2.c",
222    "src/f32-dwconv/gen/up4x4-psimd.c",
223    "src/f32-dwconv/gen/up4x9-psimd-acc2.c",
224    "src/f32-dwconv/gen/up4x9-psimd.c",
225    "src/f32-dwconv/gen/up8x25-psimd-acc2.c",
226    "src/f32-dwconv/gen/up8x25-psimd.c",
227    "src/f32-dwconv/gen/up8x4-psimd-acc2.c",
228    "src/f32-dwconv/gen/up8x4-psimd.c",
229    "src/f32-dwconv/gen/up8x9-psimd-acc2.c",
230    "src/f32-dwconv/gen/up8x9-psimd.c",
231    "src/f32-gavgpool/mp7p7q-psimd.c",
232    "src/f32-gavgpool/up7-psimd.c",
233    "src/f32-gemm/gen/1x8-psimd-loadsplat.c",
234    "src/f32-gemm/gen/1x8-psimd-splat.c",
235    "src/f32-gemm/gen/1x8s4-psimd.c",
236    "src/f32-gemm/gen/4x8-psimd-loadsplat.c",
237    "src/f32-gemm/gen/4x8-psimd-splat.c",
238    "src/f32-gemm/gen/4x8s4-psimd.c",
239    "src/f32-gemm/gen/6x8-psimd-loadsplat.c",
240    "src/f32-gemm/gen/6x8-psimd-splat.c",
241    "src/f32-gemm/gen/6x8s4-psimd.c",
242    "src/f32-gemm/gen-inc/1x8-psimd-loadsplat.c",
243    "src/f32-gemm/gen-inc/1x8-psimd-splat.c",
244    "src/f32-gemm/gen-inc/1x8s4-psimd.c",
245    "src/f32-gemm/gen-inc/4x8-psimd-loadsplat.c",
246    "src/f32-gemm/gen-inc/4x8-psimd-splat.c",
247    "src/f32-gemm/gen-inc/4x8s4-psimd.c",
248    "src/f32-gemm/gen-inc/6x8-psimd-loadsplat.c",
249    "src/f32-gemm/gen-inc/6x8-psimd-splat.c",
250    "src/f32-gemm/gen-inc/6x8s4-psimd.c",
251    "src/f32-hswish/gen/psimd-x4.c",
252    "src/f32-hswish/gen/psimd-x8.c",
253    "src/f32-igemm/gen/1x8-psimd-loadsplat.c",
254    "src/f32-igemm/gen/1x8-psimd-splat.c",
255    "src/f32-igemm/gen/1x8s4-psimd.c",
256    "src/f32-igemm/gen/4x2c4-psimd.c",
257    "src/f32-igemm/gen/4x8-psimd-loadsplat.c",
258    "src/f32-igemm/gen/4x8-psimd-splat.c",
259    "src/f32-igemm/gen/4x8s4-psimd.c",
260    "src/f32-igemm/gen/6x8-psimd-loadsplat.c",
261    "src/f32-igemm/gen/6x8-psimd-splat.c",
262    "src/f32-igemm/gen/6x8s4-psimd.c",
263    "src/f32-maxpool/9p8x-psimd-c4.c",
264    "src/f32-pavgpool/mp9p8q-psimd.c",
265    "src/f32-pavgpool/up9-psimd.c",
266    "src/f32-ppmm/gen/4x8-psimd.c",
267    "src/f32-prelu/gen/psimd-2x4.c",
268    "src/f32-prelu/gen/psimd-2x8.c",
269    "src/f32-rmax/psimd.c",
270    "src/f32-vbinary/gen/vadd-psimd-x4.c",
271    "src/f32-vbinary/gen/vadd-psimd-x8.c",
272    "src/f32-vbinary/gen/vaddc-psimd-x4.c",
273    "src/f32-vbinary/gen/vaddc-psimd-x8.c",
274    "src/f32-vbinary/gen/vdiv-psimd-x4.c",
275    "src/f32-vbinary/gen/vdiv-psimd-x8.c",
276    "src/f32-vbinary/gen/vdivc-psimd-x4.c",
277    "src/f32-vbinary/gen/vdivc-psimd-x8.c",
278    "src/f32-vbinary/gen/vmax-psimd-x4.c",
279    "src/f32-vbinary/gen/vmax-psimd-x8.c",
280    "src/f32-vbinary/gen/vmaxc-psimd-x4.c",
281    "src/f32-vbinary/gen/vmaxc-psimd-x8.c",
282    "src/f32-vbinary/gen/vmin-psimd-x4.c",
283    "src/f32-vbinary/gen/vmin-psimd-x8.c",
284    "src/f32-vbinary/gen/vminc-psimd-x4.c",
285    "src/f32-vbinary/gen/vminc-psimd-x8.c",
286    "src/f32-vbinary/gen/vmul-psimd-x4.c",
287    "src/f32-vbinary/gen/vmul-psimd-x8.c",
288    "src/f32-vbinary/gen/vmulc-psimd-x4.c",
289    "src/f32-vbinary/gen/vmulc-psimd-x8.c",
290    "src/f32-vbinary/gen/vrdivc-psimd-x4.c",
291    "src/f32-vbinary/gen/vrdivc-psimd-x8.c",
292    "src/f32-vbinary/gen/vrsubc-psimd-x4.c",
293    "src/f32-vbinary/gen/vrsubc-psimd-x8.c",
294    "src/f32-vbinary/gen/vsub-psimd-x4.c",
295    "src/f32-vbinary/gen/vsub-psimd-x8.c",
296    "src/f32-vbinary/gen/vsubc-psimd-x4.c",
297    "src/f32-vbinary/gen/vsubc-psimd-x8.c",
298    "src/f32-vmulcaddc/gen/c4-psimd-2x.c",
299    "src/f32-vmulcaddc/gen/c8-psimd-2x.c",
300    "src/x32-packx/x4-psimd.c",
301    "src/x32-pad/x2-psimd.c",
302    "src/x32-unpool/psimd.c",
303    "src/x32-zip/x2-psimd.c",
304    "src/x32-zip/x3-psimd.c",
305    "src/x32-zip/x4-psimd.c",
306    "src/x32-zip/xm-psimd.c",
307]
308
309PSIMD_ACCMATH_UKERNELS = [
310    "src/f32-raddstoreexpminusmax/gen/psimd-p5-x4.c",
311    "src/f32-raddstoreexpminusmax/gen/psimd-p5-x8.c",
312    "src/f32-raddstoreexpminusmax/gen/psimd-p5-x8-acc2.c",
313    "src/f32-raddstoreexpminusmax/gen/psimd-p5-x12.c",
314    "src/f32-raddstoreexpminusmax/gen/psimd-p5-x12-acc2.c",
315    "src/f32-raddstoreexpminusmax/gen/psimd-p5-x12-acc3.c",
316    "src/f32-raddstoreexpminusmax/gen/psimd-p5-x16.c",
317    "src/f32-raddstoreexpminusmax/gen/psimd-p5-x16-acc2.c",
318    "src/f32-raddstoreexpminusmax/gen/psimd-p5-x16-acc4.c",
319    "src/f32-raddstoreexpminusmax/gen/psimd-p5-x20.c",
320    "src/f32-raddstoreexpminusmax/gen/psimd-p5-x20-acc2.c",
321    "src/f32-raddstoreexpminusmax/gen/psimd-p5-x20-acc5.c",
322    "src/f32-sigmoid/gen/psimd-p5-div-x4.c",
323    "src/f32-sigmoid/gen/psimd-p5-div-x8.c",
324    "src/f32-sigmoid/gen/psimd-p5-div-x12.c",
325    "src/f32-sigmoid/gen/psimd-p5-div-x16.c",
326    "src/f32-sigmoid/gen/psimd-p5-div-x20.c",
327    "src/f32-sigmoid/gen/psimd-p5-div-x24.c",
328    "src/math/sigmoid-psimd-p5-div.c",
329]
330
331// ISA-specific micro-kernels
332NEON_UKERNELS = [
333    "src/f32-avgpool/mp9p8q-neon.c",
334    "src/f32-avgpool/up9-neon.c",
335    "src/f32-bilinear/gen/neon-c4.c",
336    "src/f32-bilinear/gen/neon-c8.c",
337    "src/f32-clamp/neon.c",
338    "src/f32-dwconv/gen/up4x9-neon.c",
339    "src/f32-dwconv/gen/up4x9-neon-acc2.c",
340    "src/f32-dwconv/gen/up8x9-neon.c",
341    "src/f32-dwconv/gen/up8x9-neon-acc2.c",
342    "src/f32-gavgpool-spchw/neon-x4.c",
343    "src/f32-gavgpool/mp7p7q-neon.c",
344    "src/f32-gavgpool/up7-neon.c",
345    "src/f32-gemm/gen/1x8-neon-lane-ld64.c",
346    "src/f32-gemm/gen/4x2-neon-lane-ld64.c",
347    "src/f32-gemm/gen/4x8-neon-lane-ld128.c",
348    "src/f32-gemm/gen/4x8-neon-lane-ld64.c",
349    "src/f32-gemm/gen/5x8-neon-lane-ld64.c",
350    "src/f32-gemm/gen/6x8-neon-lane-ld64.c",
351    "src/f32-gemm/gen/6x8-neon-lane-ld128.c",
352    "src/f32-gemm/gen/1x8-neon-dup-ld64.c",
353    "src/f32-gemm/gen/4x8-neon-dup-ld128.c",
354    "src/f32-gemm/gen/4x8-neon-dup-ld64.c",
355    "src/f32-gemm/gen/6x8-neon-dup-ld64.c",
356    "src/f32-gemm/gen/6x8-neon-dup-ld128.c",
357    "src/f32-gemm/gen/1x8s4-neon.c",
358    "src/f32-gemm/gen/4x8s4-neon.c",
359    "src/f32-gemm/gen/6x8s4-neon.c",
360    "src/f32-gemm/gen/8x8s4-neon.c",
361    "src/f32-gemm/gen-inc/1x8-neon-lane-ld64.c",
362    "src/f32-gemm/gen-inc/4x8-neon-lane-ld128.c",
363    "src/f32-gemm/gen-inc/4x8-neon-lane-ld64.c",
364    "src/f32-gemm/gen-inc/5x8-neon-lane-ld64.c",
365    "src/f32-gemm/gen-inc/6x8-neon-lane-ld64.c",
366    "src/f32-gemm/gen-inc/6x8-neon-lane-ld128.c",
367    "src/f32-gemm/gen-inc/1x8-neon-dup-ld64.c",
368    "src/f32-gemm/gen-inc/4x8-neon-dup-ld128.c",
369    "src/f32-gemm/gen-inc/4x8-neon-dup-ld64.c",
370    "src/f32-gemm/gen-inc/6x8-neon-dup-ld64.c",
371    "src/f32-gemm/gen-inc/6x8-neon-dup-ld128.c",
372    "src/f32-gemm/gen-inc/1x8s4-neon.c",
373    "src/f32-gemm/gen-inc/4x8s4-neon.c",
374    "src/f32-gemm/gen-inc/6x8s4-neon.c",
375    "src/f32-gemm/gen-inc/8x8s4-neon.c",
376    "src/f32-hswish/gen/neon-x4.c",
377    "src/f32-hswish/gen/neon-x8.c",
378    "src/f32-igemm/gen/1x8-neon-lane-ld64.c",
379    "src/f32-igemm/gen/4x2-neon-lane-ld64.c",
380    "src/f32-igemm/gen/4x4-neon-lane-ld64.c",
381    "src/f32-igemm/gen/4x8-neon-lane-ld128.c",
382    "src/f32-igemm/gen/4x8-neon-lane-ld64.c",
383    "src/f32-igemm/gen/6x8-neon-lane-ld64.c",
384    "src/f32-igemm/gen/6x8-neon-lane-ld128.c",
385    "src/f32-igemm/gen/1x8-neon-dup-ld64.c",
386    "src/f32-igemm/gen/4x8-neon-dup-ld128.c",
387    "src/f32-igemm/gen/4x8-neon-dup-ld64.c",
388    "src/f32-igemm/gen/6x8-neon-dup-ld64.c",
389    "src/f32-igemm/gen/6x8-neon-dup-ld128.c",
390    "src/f32-igemm/gen/1x8s4-neon.c",
391    "src/f32-igemm/gen/4x8s4-neon.c",
392    "src/f32-igemm/gen/6x8s4-neon.c",
393    "src/f32-igemm/gen/8x8s4-neon.c",
394    "src/f32-pavgpool/mp9p8q-neon.c",
395    "src/f32-pavgpool/up9-neon.c",
396    "src/f32-ppmm/gen/4x8-neon.c",
397    "src/f32-ppmm/gen/8x8-neon.c",
398    "src/f32-prelu/gen/neon-2x4.c",
399    "src/f32-prelu/gen/neon-2x8.c",
400    "src/f32-raddstoreexpminusmax/gen/neon-p5-x4.c",
401    "src/f32-raddstoreexpminusmax/gen/neon-p5-x8.c",
402    "src/f32-raddstoreexpminusmax/gen/neon-p5-x8-acc2.c",
403    "src/f32-raddstoreexpminusmax/gen/neon-p5-x12.c",
404    "src/f32-raddstoreexpminusmax/gen/neon-p5-x12-acc2.c",
405    "src/f32-raddstoreexpminusmax/gen/neon-p5-x12-acc3.c",
406    "src/f32-raddstoreexpminusmax/gen/neon-p5-x16.c",
407    "src/f32-raddstoreexpminusmax/gen/neon-p5-x16-acc2.c",
408    "src/f32-raddstoreexpminusmax/gen/neon-p5-x16-acc4.c",
409    "src/f32-raddstoreexpminusmax/gen/neon-p5-x20.c",
410    "src/f32-raddstoreexpminusmax/gen/neon-p5-x20-acc2.c",
411    "src/f32-raddstoreexpminusmax/gen/neon-p5-x20-acc5.c",
412    "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x4.c",
413    "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x8.c",
414    "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x8-acc2.c",
415    "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x12.c",
416    "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x12-acc2.c",
417    "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x12-acc3.c",
418    "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x16.c",
419    "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x16-acc2.c",
420    "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x16-acc4.c",
421    "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x20.c",
422    "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x20-acc2.c",
423    "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x20-acc5.c",
424    "src/f32-rmax/neon.c",
425    "src/f32-sigmoid/gen/neon-frac-p9-p10-nr1recps-x16.c",
426    "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x4.c",
427    "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x8.c",
428    "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x12.c",
429    "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x16.c",
430    "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x20.c",
431    "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x24.c",
432    "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x4.c",
433    "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x8.c",
434    "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x12.c",
435    "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x16.c",
436    "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x20.c",
437    "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x24.c",
438    "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x4.c",
439    "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x8.c",
440    "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x12.c",
441    "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x16.c",
442    "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x20.c",
443    "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x24.c",
444    "src/f32-vbinary/gen/vadd-neon-x4.c",
445    "src/f32-vbinary/gen/vadd-neon-x8.c",
446    "src/f32-vbinary/gen/vaddc-neon-x4.c",
447    "src/f32-vbinary/gen/vaddc-neon-x8.c",
448    "src/f32-vbinary/gen/vmax-neon-x4.c",
449    "src/f32-vbinary/gen/vmax-neon-x8.c",
450    "src/f32-vbinary/gen/vmaxc-neon-x4.c",
451    "src/f32-vbinary/gen/vmaxc-neon-x8.c",
452    "src/f32-vbinary/gen/vmin-neon-x4.c",
453    "src/f32-vbinary/gen/vmin-neon-x8.c",
454    "src/f32-vbinary/gen/vminc-neon-x4.c",
455    "src/f32-vbinary/gen/vminc-neon-x8.c",
456    "src/f32-vbinary/gen/vmul-neon-x4.c",
457    "src/f32-vbinary/gen/vmul-neon-x8.c",
458    "src/f32-vbinary/gen/vmulc-neon-x4.c",
459    "src/f32-vbinary/gen/vmulc-neon-x8.c",
460    "src/f32-vbinary/gen/vrsubc-neon-x4.c",
461    "src/f32-vbinary/gen/vrsubc-neon-x8.c",
462    "src/f32-vbinary/gen/vsub-neon-x4.c",
463    "src/f32-vbinary/gen/vsub-neon-x8.c",
464    "src/f32-vbinary/gen/vsubc-neon-x4.c",
465    "src/f32-vbinary/gen/vsubc-neon-x8.c",
466    "src/f32-vmulcaddc/gen/c4-neon-2x.c",
467    "src/f32-vmulcaddc/gen/c8-neon-2x.c",
468    "src/q8-avgpool/mp9p8q-neon.c",
469    "src/q8-avgpool/up9-neon.c",
470    "src/q8-dwconv/up8x9-neon.c",
471    "src/q8-gavgpool/mp7p7q-neon.c",
472    "src/q8-gavgpool/up7-neon.c",
473    "src/q8-gemm/4x8-neon.c",
474    "src/q8-gemm/8x8-neon.c",
475    "src/q8-igemm/4x8-neon.c",
476    "src/q8-igemm/8x8-neon.c",
477    "src/q8-vadd/neon.c",
478    "src/u8-clamp/neon.c",
479    "src/u8-maxpool/9p8x-neon-c16.c",
480    "src/u8-rmax/neon.c",
481    "src/x32-packx/x4-neon-st4.c",
482    "src/x32-pad/x2-neon.c",
483    "src/x32-zip/x2-neon.c",
484    "src/x32-zip/x3-neon.c",
485    "src/x32-zip/x4-neon.c",
486    "src/x32-zip/xm-neon.c",
487    "src/x8-zip/x2-neon.c",
488    "src/x8-zip/x3-neon.c",
489    "src/x8-zip/x4-neon.c",
490    "src/x8-zip/xm-neon.c",
491    "src/math/sigmoid-neon-frac-p9-p10-nr1recps.c",
492    "src/math/sigmoid-neon-rr1-lut2048-p1-nr2recps.c",
493    "src/math/sigmoid-neon-rr1-lut64-p2-nr2recps.c",
494    "src/math/sigmoid-neon-rr1-p5-nr2recps.c",
495    "src/math/sigmoid-neon-rr2-lut2048-p1-nr2recps.c",
496    "src/math/sigmoid-neon-rr2-lut64-p2-nr2recps.c",
497    "src/math/sigmoid-neon-rr2-p5-nr2recps.c",
498]
499
500NEONFMA_UKERNELS = [
501    "src/f32-bilinear/gen/neonfma-c4.c",
502    "src/f32-bilinear/gen/neonfma-c8.c",
503    "src/f32-igemm/gen/1x8-neonfma-dup-ld64.c",
504    "src/f32-igemm/gen/4x8-neonfma-dup-ld128.c",
505    "src/f32-igemm/gen/4x8-neonfma-dup-ld64.c",
506    "src/f32-igemm/gen/6x8-neonfma-dup-ld64.c",
507    "src/f32-igemm/gen/6x8-neonfma-dup-ld128.c",
508    "src/f32-igemm/gen/1x8s4-neonfma.c",
509    "src/f32-igemm/gen/4x8s4-neonfma.c",
510    "src/f32-igemm/gen/6x8s4-neonfma.c",
511    "src/f32-igemm/gen/8x8s4-neonfma.c",
512    "src/f32-dwconv/gen/up4x9-neonfma.c",
513    "src/f32-dwconv/gen/up4x9-neonfma-acc2.c",
514    "src/f32-dwconv/gen/up8x9-neonfma.c",
515    "src/f32-dwconv/gen/up8x9-neonfma-acc2.c",
516    "src/f32-gemm/gen/1x8-neonfma-dup-ld64.c",
517    "src/f32-gemm/gen/4x8-neonfma-dup-ld128.c",
518    "src/f32-gemm/gen/4x8-neonfma-dup-ld64.c",
519    "src/f32-gemm/gen/6x8-neonfma-dup-ld64.c",
520    "src/f32-gemm/gen/6x8-neonfma-dup-ld128.c",
521    "src/f32-gemm/gen/1x8s4-neonfma.c",
522    "src/f32-gemm/gen/4x8s4-neonfma.c",
523    "src/f32-gemm/gen/6x8s4-neonfma.c",
524    "src/f32-gemm/gen/8x8s4-neonfma.c",
525    "src/f32-gemm/gen-inc/1x8-neonfma-dup-ld64.c",
526    "src/f32-gemm/gen-inc/4x8-neonfma-dup-ld128.c",
527    "src/f32-gemm/gen-inc/4x8-neonfma-dup-ld64.c",
528    "src/f32-gemm/gen-inc/6x8-neonfma-dup-ld64.c",
529    "src/f32-gemm/gen-inc/6x8-neonfma-dup-ld128.c",
530    "src/f32-gemm/gen-inc/1x8s4-neonfma.c",
531    "src/f32-gemm/gen-inc/4x8s4-neonfma.c",
532    "src/f32-gemm/gen-inc/6x8s4-neonfma.c",
533    "src/f32-gemm/gen-inc/8x8s4-neonfma.c",
534    "src/f32-hswish/gen/neonfma-x4.c",
535    "src/f32-hswish/gen/neonfma-x8.c",
536    "src/f32-ppmm/gen/4x8-neonfma.c",
537    "src/f32-ppmm/gen/8x8-neonfma.c",
538    "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x4.c",
539    "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x8.c",
540    "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x8-acc2.c",
541    "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x12.c",
542    "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x12-acc2.c",
543    "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x12-acc3.c",
544    "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x16.c",
545    "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x16-acc2.c",
546    "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x16-acc4.c",
547    "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x20.c",
548    "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x20-acc2.c",
549    "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x20-acc5.c",
550    "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x4.c",
551    "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x8.c",
552    "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x8-acc2.c",
553    "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x12.c",
554    "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x12-acc2.c",
555    "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x12-acc3.c",
556    "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x16.c",
557    "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x16-acc2.c",
558    "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x16-acc4.c",
559    "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x20.c",
560    "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x20-acc2.c",
561    "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x20-acc5.c",
562    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x4.c",
563    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x8.c",
564    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x12.c",
565    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x16.c",
566    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x20.c",
567    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x24.c",
568    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x4.c",
569    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x8.c",
570    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x12.c",
571    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x16.c",
572    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x20.c",
573    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x24.c",
574    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x4.c",
575    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x8.c",
576    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x12.c",
577    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x16.c",
578    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x20.c",
579    "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x24.c",
580    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x4.c",
581    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x8.c",
582    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x12.c",
583    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x16.c",
584    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x20.c",
585    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x24.c",
586    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x4.c",
587    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x8.c",
588    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x12.c",
589    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x16.c",
590    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x20.c",
591    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x24.c",
592    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x4.c",
593    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x8.c",
594    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x12.c",
595    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x16.c",
596    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x20.c",
597    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x24.c",
598    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x4.c",
599    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x8.c",
600    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x12.c",
601    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x16.c",
602    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x20.c",
603    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x24.c",
604    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x4.c",
605    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c",
606    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x12.c",
607    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x16.c",
608    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x20.c",
609    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x24.c",
610    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x4.c",
611    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x8.c",
612    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x12.c",
613    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x16.c",
614    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x20.c",
615    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x24.c",
616    "src/f32-vmulcaddc/gen/c4-neonfma-2x.c",
617    "src/f32-vmulcaddc/gen/c8-neonfma-2x.c",
618    "src/math/exp-neonfma-lut64-p2.c",
619    "src/math/exp-neonfma-p5.c",
620    "src/math/expminus-neonfma-lut2048-p1.c",
621    "src/math/expminus-neonfma-lut64-p2.c",
622    "src/math/expminus-neonfma-p5.c",
623    "src/math/sigmoid-neonfma-rr1-lut2048-p1-nr1recps1fma.c",
624    "src/math/sigmoid-neonfma-rr1-lut2048-p1-nr2fma.c",
625    "src/math/sigmoid-neonfma-rr1-lut2048-p1-nr2recps.c",
626    "src/math/sigmoid-neonfma-rr1-lut64-p2-nr1recps1fma.c",
627    "src/math/sigmoid-neonfma-rr1-lut64-p2-nr2fma.c",
628    "src/math/sigmoid-neonfma-rr1-lut64-p2-nr2recps.c",
629    "src/math/sigmoid-neonfma-rr1-p5-nr1recps1fma.c",
630    "src/math/sigmoid-neonfma-rr1-p5-nr2fma.c",
631    "src/math/sigmoid-neonfma-rr1-p5-nr2recps.c",
632    "src/math/sigmoid-neonfma-rr2-lut2048-p1-nr1recps1fma.c",
633    "src/math/sigmoid-neonfma-rr2-lut2048-p1-nr2fma.c",
634    "src/math/sigmoid-neonfma-rr2-lut2048-p1-nr2recps.c",
635    "src/math/sigmoid-neonfma-rr2-lut64-p2-nr1recps1fma.c",
636    "src/math/sigmoid-neonfma-rr2-lut64-p2-nr2fma.c",
637    "src/math/sigmoid-neonfma-rr2-lut64-p2-nr2recps.c",
638    "src/math/sigmoid-neonfma-rr2-p5-nr1recps1fma.c",
639    "src/math/sigmoid-neonfma-rr2-p5-nr2fma.c",
640    "src/math/sigmoid-neonfma-rr2-p5-nr2recps.c",
641]
642
643AARCH64_NEONFMA_UKERNELS = [
644    "src/f32-vbinary/gen/vdiv-neon-x4.c",
645    "src/f32-vbinary/gen/vdiv-neon-x8.c",
646    "src/f32-vbinary/gen/vdivc-neon-x4.c",
647    "src/f32-vbinary/gen/vdivc-neon-x8.c",
648    "src/f32-vbinary/gen/vrdivc-neon-x4.c",
649    "src/f32-vbinary/gen/vrdivc-neon-x8.c",
650    "src/f32-gemm/gen/1x8-neonfma-lane-ld64.c",
651    "src/f32-gemm/gen/4x2-neonfma-lane-ld64.c",
652    "src/f32-gemm/gen/4x8-neonfma-lane-ld128.c",
653    "src/f32-gemm/gen/4x8-neonfma-lane-ld64.c",
654    "src/f32-gemm/gen/5x8-neonfma-lane-ld64.c",
655    "src/f32-gemm/gen/6x8-neonfma-lane-ld64.c",
656    "src/f32-gemm/gen/6x8-neonfma-lane-ld128.c",
657    "src/f32-gemm/gen-inc/1x8-neonfma-lane-ld64.c",
658    "src/f32-gemm/gen-inc/4x8-neonfma-lane-ld128.c",
659    "src/f32-gemm/gen-inc/4x8-neonfma-lane-ld64.c",
660    "src/f32-gemm/gen-inc/5x8-neonfma-lane-ld64.c",
661    "src/f32-gemm/gen-inc/6x8-neonfma-lane-ld64.c",
662    "src/f32-gemm/gen-inc/6x8-neonfma-lane-ld128.c",
663    "src/f32-igemm/gen/1x8-neonfma-lane-ld64.c",
664    "src/f32-igemm/gen/4x2-neonfma-lane-ld64.c",
665    "src/f32-igemm/gen/4x4-neonfma-lane-ld64.c",
666    "src/f32-igemm/gen/4x8-neonfma-lane-ld128.c",
667    "src/f32-igemm/gen/4x8-neonfma-lane-ld64.c",
668    "src/f32-igemm/gen/6x8-neonfma-lane-ld64.c",
669    "src/f32-igemm/gen/6x8-neonfma-lane-ld128.c",
670    "src/f32-conv-hwc/3x3s2p1c3x4-neonfma-2x2.c",
671    "src/f32-conv-hwc/3x3s2p1c3x8-neonfma-2x2.c",
672    "src/f32-conv-hwc2spchw/3x3s2p1c3x4-neonfma-2x2.c",
673    "src/f32-dwconv-spchw/3x3p1-neonfma.c",
674    "src/f32-dwconv-spchw/5x5p2-neonfma.c",
675    "src/f32-dwconv-spchw/3x3s2p1-neonfma.c",
676    "src/f32-dwconv-spchw/5x5s2p2-neonfma.c",
677    "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x4.c",
678    "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x8.c",
679    "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x12.c",
680    "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x16.c",
681    "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x20.c",
682    "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x24.c",
683    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x4.c",
684    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x8.c",
685    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x12.c",
686    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x16.c",
687    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x20.c",
688    "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x24.c",
689    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x4.c",
690    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x8.c",
691    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x12.c",
692    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x16.c",
693    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x20.c",
694    "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x24.c",
695    "src/f32-spmm/gen/12x1-neonfma.c",
696    "src/f32-spmm/gen/12x2-neonfma.c",
697    "src/f32-spmm/gen/12x4-neonfma.c",
698    "src/f32-spmm/gen/16x1-neonfma-pipelined.c",
699    "src/f32-spmm/gen/16x1-neonfma-unroll2.c",
700    "src/f32-spmm/gen/16x1-neonfma.c",
701    "src/f32-spmm/gen/16x2-neonfma.c",
702    "src/f32-spmm/gen/16x4-neonfma.c",
703    "src/f32-spmm/gen/4x1-neonfma-pipelined.c",
704    "src/f32-spmm/gen/4x1-neonfma-unroll2.c",
705    "src/f32-spmm/gen/4x1-neonfma.c",
706    "src/f32-spmm/gen/4x2-neonfma.c",
707    "src/f32-spmm/gen/4x4-neonfma.c",
708    "src/f32-spmm/gen/8x1-neonfma-pipelined.c",
709    "src/f32-spmm/gen/8x1-neonfma-unroll2.c",
710    "src/f32-spmm/gen/8x1-neonfma.c",
711    "src/f32-spmm/gen/8x2-neonfma.c",
712    "src/f32-spmm/gen/8x4-neonfma.c",
713    "src/math/sigmoid-neonfma-rr1-lut2048-p1-div.c",
714    "src/math/sigmoid-neonfma-rr1-lut64-p2-div.c",
715    "src/math/sigmoid-neonfma-rr1-p5-div.c",
716    "src/math/sigmoid-neonfma-rr2-lut2048-p1-div.c",
717    "src/math/sigmoid-neonfma-rr2-lut64-p2-div.c",
718    "src/math/sigmoid-neonfma-rr2-p5-div.c",
719]
720
721AARCH64_NEONFP16ARITH_UKERNELS = [
722    "src/f16-gemm/gen/4x8-neonfp16arith-ld64.c",
723    "src/f16-gemm/gen/6x8-neonfp16arith-ld64.c",
724    "src/f16-gemm/gen/8x8-neonfp16arith-ld64.c",
725]
726
727SSE_UKERNELS = [
728    "src/f32-avgpool/mp9p8q-sse.c",
729    "src/f32-avgpool/up9-sse.c",
730    "src/f32-bilinear/gen/sse-c4.c",
731    "src/f32-bilinear/gen/sse-c8.c",
732    "src/f32-clamp/sse.c",
733    "src/f32-dwconv-spchw/3x3p1-sse.c",
734    "src/f32-dwconv-spchw/3x3s2p1-sse.c",
735    "src/f32-dwconv/gen/up4x25-sse-acc2.c",
736    "src/f32-dwconv/gen/up4x25-sse.c",
737    "src/f32-dwconv/gen/up4x4-sse-acc2.c",
738    "src/f32-dwconv/gen/up4x4-sse.c",
739    "src/f32-dwconv/gen/up4x9-sse-acc2.c",
740    "src/f32-dwconv/gen/up4x9-sse.c",
741    "src/f32-dwconv/gen/up8x25-sse-acc2.c",
742    "src/f32-dwconv/gen/up8x25-sse.c",
743    "src/f32-dwconv/gen/up8x4-sse-acc2.c",
744    "src/f32-dwconv/gen/up8x4-sse.c",
745    "src/f32-dwconv/gen/up8x9-sse-acc2.c",
746    "src/f32-dwconv/gen/up8x9-sse.c",
747    "src/f32-gavgpool-spchw/sse-x4.c",
748    "src/f32-gavgpool/mp7p7q-sse.c",
749    "src/f32-gavgpool/up7-sse.c",
750    "src/f32-gemm/gen/1x8-sse-dup.c",
751    "src/f32-gemm/gen/1x8-sse-load1.c",
752    "src/f32-gemm/gen/1x8s4-sse.c",
753    "src/f32-gemm/gen/4x8-sse-dup.c",
754    "src/f32-gemm/gen/4x8-sse-load1.c",
755    "src/f32-gemm/gen/4x8s4-sse.c",
756    "src/f32-gemm/gen-inc/1x8-sse-dup.c",
757    "src/f32-gemm/gen-inc/1x8-sse-load1.c",
758    "src/f32-gemm/gen-inc/1x8s4-sse.c",
759    "src/f32-gemm/gen-inc/4x8-sse-dup.c",
760    "src/f32-gemm/gen-inc/4x8-sse-load1.c",
761    "src/f32-gemm/gen-inc/4x8s4-sse.c",
762    "src/f32-hswish/gen/sse-x4.c",
763    "src/f32-hswish/gen/sse-x8.c",
764    "src/f32-igemm/gen/1x8-sse-dup.c",
765    "src/f32-igemm/gen/1x8-sse-load1.c",
766    "src/f32-igemm/gen/1x8s4-sse.c",
767    "src/f32-igemm/gen/4x2c4-sse.c",
768    "src/f32-igemm/gen/4x8-sse-dup.c",
769    "src/f32-igemm/gen/4x8-sse-load1.c",
770    "src/f32-igemm/gen/4x8s4-sse.c",
771    "src/f32-maxpool/9p8x-sse-c4.c",
772    "src/f32-pavgpool/mp9p8q-sse.c",
773    "src/f32-pavgpool/up9-sse.c",
774    "src/f32-ppmm/gen/4x8-sse.c",
775    "src/f32-rmax/sse.c",
776    "src/f32-spmm/gen/4x1-sse.c",
777    "src/f32-spmm/gen/8x1-sse.c",
778    "src/f32-vbinary/gen/vadd-sse-x4.c",
779    "src/f32-vbinary/gen/vadd-sse-x8.c",
780    "src/f32-vbinary/gen/vaddc-sse-x4.c",
781    "src/f32-vbinary/gen/vaddc-sse-x8.c",
782    "src/f32-vbinary/gen/vdiv-sse-x4.c",
783    "src/f32-vbinary/gen/vdiv-sse-x8.c",
784    "src/f32-vbinary/gen/vdivc-sse-x4.c",
785    "src/f32-vbinary/gen/vdivc-sse-x8.c",
786    "src/f32-vbinary/gen/vmax-sse-x4.c",
787    "src/f32-vbinary/gen/vmax-sse-x8.c",
788    "src/f32-vbinary/gen/vmaxc-sse-x4.c",
789    "src/f32-vbinary/gen/vmaxc-sse-x8.c",
790    "src/f32-vbinary/gen/vmin-sse-x4.c",
791    "src/f32-vbinary/gen/vmin-sse-x8.c",
792    "src/f32-vbinary/gen/vminc-sse-x4.c",
793    "src/f32-vbinary/gen/vminc-sse-x8.c",
794    "src/f32-vbinary/gen/vmul-sse-x4.c",
795    "src/f32-vbinary/gen/vmul-sse-x8.c",
796    "src/f32-vbinary/gen/vmulc-sse-x4.c",
797    "src/f32-vbinary/gen/vmulc-sse-x8.c",
798    "src/f32-vbinary/gen/vrdivc-sse-x4.c",
799    "src/f32-vbinary/gen/vrdivc-sse-x8.c",
800    "src/f32-vbinary/gen/vrsubc-sse-x4.c",
801    "src/f32-vbinary/gen/vrsubc-sse-x8.c",
802    "src/f32-vbinary/gen/vsub-sse-x4.c",
803    "src/f32-vbinary/gen/vsub-sse-x8.c",
804    "src/f32-vbinary/gen/vsubc-sse-x4.c",
805    "src/f32-vbinary/gen/vsubc-sse-x8.c",
806    "src/f32-vmulcaddc/gen/c4-sse-2x.c",
807    "src/f32-vmulcaddc/gen/c8-sse-2x.c",
808    "src/x32-packx/x4-sse.c",
809]
810
811SSE2_UKERNELS = [
812    "src/f32-argmaxpool/9p8x-sse2-c4.c",
813    "src/f32-argmaxpool/4x-sse2-c4.c",
814    "src/f32-argmaxpool/9x-sse2-c4.c",
815    "src/f32-prelu/gen/sse2-2x4.c",
816    "src/f32-prelu/gen/sse2-2x8.c",
817    "src/f32-raddstoreexpminusmax/gen/sse2-p5-x4.c",
818    "src/f32-raddstoreexpminusmax/gen/sse2-p5-x8.c",
819    "src/f32-raddstoreexpminusmax/gen/sse2-p5-x8-acc2.c",
820    "src/f32-raddstoreexpminusmax/gen/sse2-p5-x12.c",
821    "src/f32-raddstoreexpminusmax/gen/sse2-p5-x12-acc2.c",
822    "src/f32-raddstoreexpminusmax/gen/sse2-p5-x12-acc3.c",
823    "src/f32-raddstoreexpminusmax/gen/sse2-p5-x16.c",
824    "src/f32-raddstoreexpminusmax/gen/sse2-p5-x16-acc2.c",
825    "src/f32-raddstoreexpminusmax/gen/sse2-p5-x16-acc4.c",
826    "src/f32-raddstoreexpminusmax/gen/sse2-p5-x20.c",
827    "src/f32-raddstoreexpminusmax/gen/sse2-p5-x20-acc2.c",
828    "src/f32-raddstoreexpminusmax/gen/sse2-p5-x20-acc5.c",
829    "src/f32-sigmoid/gen/sse2-p5-div-x4.c",
830    "src/f32-sigmoid/gen/sse2-p5-div-x8.c",
831    "src/f32-sigmoid/gen/sse2-p5-div-x12.c",
832    "src/f32-sigmoid/gen/sse2-p5-div-x16.c",
833    "src/f32-sigmoid/gen/sse2-p5-div-x20.c",
834    "src/f32-sigmoid/gen/sse2-p5-div-x24.c",
835    "src/q8-avgpool/mp9p8q-sse2.c",
836    "src/q8-avgpool/up9-sse2.c",
837    "src/q8-igemm/4x4c2-sse2.c",
838    "src/q8-dwconv/up8x9-sse2.c",
839    "src/q8-gavgpool/mp7p7q-sse2.c",
840    "src/q8-gavgpool/up7-sse2.c",
841    "src/q8-gemm/2x4c8-sse2.c",
842    "src/q8-gemm/4x4c2-sse2.c",
843    "src/q8-vadd/sse2.c",
844    "src/u8-clamp/sse2.c",
845    "src/u8-maxpool/9p8x-sse2-c16.c",
846    "src/u8-rmax/sse2.c",
847    "src/x32-pad/x2-sse2.c",
848    "src/x32-zip/x2-sse2.c",
849    "src/x32-zip/x3-sse2.c",
850    "src/x32-zip/x4-sse2.c",
851    "src/x32-zip/xm-sse2.c",
852    "src/x8-zip/x2-sse2.c",
853    "src/x8-zip/x3-sse2.c",
854    "src/x8-zip/x4-sse2.c",
855    "src/x8-zip/xm-sse2.c",
856    "src/math/exp-sse2-p5.c",
857    "src/math/expminus-sse2-p5.c",
858    "src/math/sigmoid-sse2-p5-div.c",
859]
860
861SSE41_UKERNELS = [
862    "src/f32-prelu/gen/sse41-2x4.c",
863    "src/f32-prelu/gen/sse41-2x8.c",
864    "src/f32-sigmoid/gen/sse41-p5-div-x4.c",
865    "src/f32-sigmoid/gen/sse41-p5-div-x8.c",
866    "src/f32-sigmoid/gen/sse41-p5-div-x12.c",
867    "src/f32-sigmoid/gen/sse41-p5-div-x16.c",
868    "src/f32-sigmoid/gen/sse41-p5-div-x20.c",
869    "src/f32-sigmoid/gen/sse41-p5-div-x24.c",
870]
871
872AVX_UKERNELS = [
873    "src/f32-clamp/avx.c",
874    "src/f32-dwconv/gen/up16x4-avx-acc2.c",
875    "src/f32-dwconv/gen/up16x4-avx.c",
876    "src/f32-dwconv/gen/up8x4-avx-acc2.c",
877    "src/f32-dwconv/gen/up8x4-avx.c",
878    "src/f32-dwconv/gen/up16x9-avx-acc2.c",
879    "src/f32-dwconv/gen/up16x9-avx.c",
880    "src/f32-dwconv/gen/up8x9-avx-acc2.c",
881    "src/f32-dwconv/gen/up8x9-avx.c",
882    "src/f32-dwconv/gen/up16x25-avx-acc2.c",
883    "src/f32-dwconv/gen/up16x25-avx.c",
884    "src/f32-dwconv/gen/up8x25-avx-acc2.c",
885    "src/f32-dwconv/gen/up8x25-avx.c",
886    "src/f32-gemm/gen/1x8-avx-broadcast.c",
887    "src/f32-gemm/gen/4x8-avx-broadcast.c",
888    "src/f32-gemm/gen/5x8-avx-broadcast.c",
889    "src/f32-gemm/gen/6x8-avx-broadcast.c",
890    "src/f32-gemm/gen/7x8-avx-broadcast.c",
891    "src/f32-gemm/gen/1x16-avx-broadcast.c",
892    "src/f32-gemm/gen/3x16-avx-broadcast.c",
893    "src/f32-gemm/gen/4x16-avx-broadcast.c",
894    "src/f32-gemm/gen/5x16-avx-broadcast.c",
895    "src/f32-gemm/gen-inc/1x8-avx-broadcast.c",
896    "src/f32-gemm/gen-inc/4x8-avx-broadcast.c",
897    "src/f32-gemm/gen-inc/5x8-avx-broadcast.c",
898    "src/f32-gemm/gen-inc/6x8-avx-broadcast.c",
899    "src/f32-gemm/gen-inc/7x8-avx-broadcast.c",
900    "src/f32-gemm/gen-inc/1x16-avx-broadcast.c",
901    "src/f32-gemm/gen-inc/3x16-avx-broadcast.c",
902    "src/f32-gemm/gen-inc/4x16-avx-broadcast.c",
903    "src/f32-gemm/gen-inc/5x16-avx-broadcast.c",
904    "src/f32-hswish/gen/avx-x8.c",
905    "src/f32-hswish/gen/avx-x16.c",
906    "src/f32-igemm/gen/1x8-avx-broadcast.c",
907    "src/f32-igemm/gen/4x8-avx-broadcast.c",
908    "src/f32-igemm/gen/5x8-avx-broadcast.c",
909    "src/f32-igemm/gen/6x8-avx-broadcast.c",
910    "src/f32-igemm/gen/7x8-avx-broadcast.c",
911    "src/f32-igemm/gen/1x16-avx-broadcast.c",
912    "src/f32-igemm/gen/3x16-avx-broadcast.c",
913    "src/f32-igemm/gen/4x16-avx-broadcast.c",
914    "src/f32-igemm/gen/5x16-avx-broadcast.c",
915    "src/f32-rmax/avx.c",
916    "src/f32-vbinary/gen/vadd-avx-x8.c",
917    "src/f32-vbinary/gen/vadd-avx-x16.c",
918    "src/f32-vbinary/gen/vaddc-avx-x8.c",
919    "src/f32-vbinary/gen/vaddc-avx-x16.c",
920    "src/f32-vbinary/gen/vdiv-avx-x8.c",
921    "src/f32-vbinary/gen/vdiv-avx-x16.c",
922    "src/f32-vbinary/gen/vdivc-avx-x8.c",
923    "src/f32-vbinary/gen/vdivc-avx-x16.c",
924    "src/f32-vbinary/gen/vmax-avx-x8.c",
925    "src/f32-vbinary/gen/vmax-avx-x16.c",
926    "src/f32-vbinary/gen/vmaxc-avx-x8.c",
927    "src/f32-vbinary/gen/vmaxc-avx-x16.c",
928    "src/f32-vbinary/gen/vmin-avx-x8.c",
929    "src/f32-vbinary/gen/vmin-avx-x16.c",
930    "src/f32-vbinary/gen/vminc-avx-x8.c",
931    "src/f32-vbinary/gen/vminc-avx-x16.c",
932    "src/f32-vbinary/gen/vmul-avx-x8.c",
933    "src/f32-vbinary/gen/vmul-avx-x16.c",
934    "src/f32-vbinary/gen/vmulc-avx-x8.c",
935    "src/f32-vbinary/gen/vmulc-avx-x16.c",
936    "src/f32-vbinary/gen/vrdivc-avx-x8.c",
937    "src/f32-vbinary/gen/vrdivc-avx-x16.c",
938    "src/f32-vbinary/gen/vrsubc-avx-x8.c",
939    "src/f32-vbinary/gen/vrsubc-avx-x16.c",
940    "src/f32-vbinary/gen/vsub-avx-x8.c",
941    "src/f32-vbinary/gen/vsub-avx-x16.c",
942    "src/f32-vbinary/gen/vsubc-avx-x8.c",
943    "src/f32-vbinary/gen/vsubc-avx-x16.c",
944    "src/f32-vscale/avx-unroll32.c",
945]
946
947FMA3_UKERNELS = [
948    "src/f32-dwconv/gen/up16x4-fma3-acc2.c",
949    "src/f32-dwconv/gen/up16x4-fma3.c",
950    "src/f32-dwconv/gen/up8x4-fma3-acc2.c",
951    "src/f32-dwconv/gen/up8x4-fma3.c",
952    "src/f32-dwconv/gen/up16x9-fma3-acc2.c",
953    "src/f32-dwconv/gen/up16x9-fma3.c",
954    "src/f32-dwconv/gen/up8x9-fma3-acc2.c",
955    "src/f32-dwconv/gen/up8x9-fma3.c",
956    "src/f32-dwconv/gen/up16x25-fma3-acc2.c",
957    "src/f32-dwconv/gen/up16x25-fma3.c",
958    "src/f32-dwconv/gen/up8x25-fma3-acc2.c",
959    "src/f32-dwconv/gen/up8x25-fma3.c",
960    "src/f32-gemm/gen/1x8-fma3-broadcast.c",
961    "src/f32-gemm/gen/4x8-fma3-broadcast.c",
962    "src/f32-gemm/gen/5x8-fma3-broadcast.c",
963    "src/f32-gemm/gen/6x8-fma3-broadcast.c",
964    "src/f32-gemm/gen/7x8-fma3-broadcast.c",
965    "src/f32-gemm/gen/8x8-fma3-broadcast.c",
966    "src/f32-gemm/gen/1x16-fma3-broadcast.c",
967    "src/f32-gemm/gen/3x16-fma3-broadcast.c",
968    "src/f32-gemm/gen/4x16-fma3-broadcast.c",
969    "src/f32-gemm/gen/5x16-fma3-broadcast.c",
970    "src/f32-gemm/gen/1x16s4-fma3-broadcast.c",
971    "src/f32-gemm/gen/3x16s4-fma3-broadcast.c",
972    "src/f32-gemm/gen/4x16s4-fma3-broadcast.c",
973    "src/f32-gemm/gen/5x16s4-fma3-broadcast.c",
974    "src/f32-gemm/gen-inc/1x8-fma3-broadcast.c",
975    "src/f32-gemm/gen-inc/4x8-fma3-broadcast.c",
976    "src/f32-gemm/gen-inc/5x8-fma3-broadcast.c",
977    "src/f32-gemm/gen-inc/6x8-fma3-broadcast.c",
978    "src/f32-gemm/gen-inc/7x8-fma3-broadcast.c",
979    "src/f32-gemm/gen-inc/8x8-fma3-broadcast.c",
980    "src/f32-gemm/gen-inc/1x16-fma3-broadcast.c",
981    "src/f32-gemm/gen-inc/3x16-fma3-broadcast.c",
982    "src/f32-gemm/gen-inc/4x16-fma3-broadcast.c",
983    "src/f32-gemm/gen-inc/5x16-fma3-broadcast.c",
984    "src/f32-gemm/gen-inc/1x16s4-fma3-broadcast.c",
985    "src/f32-gemm/gen-inc/3x16s4-fma3-broadcast.c",
986    "src/f32-gemm/gen-inc/4x16s4-fma3-broadcast.c",
987    "src/f32-gemm/gen-inc/5x16s4-fma3-broadcast.c",
988    "src/f32-hswish/gen/fma3-x8.c",
989    "src/f32-hswish/gen/fma3-x16.c",
990    "src/f32-igemm/gen/1x8-fma3-broadcast.c",
991    "src/f32-igemm/gen/4x8-fma3-broadcast.c",
992    "src/f32-igemm/gen/5x8-fma3-broadcast.c",
993    "src/f32-igemm/gen/6x8-fma3-broadcast.c",
994    "src/f32-igemm/gen/7x8-fma3-broadcast.c",
995    "src/f32-igemm/gen/8x8-fma3-broadcast.c",
996    "src/f32-igemm/gen/1x16-fma3-broadcast.c",
997    "src/f32-igemm/gen/3x16-fma3-broadcast.c",
998    "src/f32-igemm/gen/4x16-fma3-broadcast.c",
999    "src/f32-igemm/gen/5x16-fma3-broadcast.c",
1000    "src/f32-igemm/gen/1x16s4-fma3-broadcast.c",
1001    "src/f32-igemm/gen/3x16s4-fma3-broadcast.c",
1002    "src/f32-igemm/gen/4x16s4-fma3-broadcast.c",
1003    "src/f32-igemm/gen/5x16s4-fma3-broadcast.c",
1004]
1005
1006AVX2_UKERNELS = [
1007    "src/f32-raddexpminusmax/gen/avx2-p5-x64.c",
1008    "src/f32-raddexpminusmax/gen/avx2-p5-x64-acc2.c",
1009    "src/f32-raddexpminusmax/gen/avx2-p5-x64-acc4.c",
1010    "src/f32-raddexpminusmax/gen/avx2-p5-x72.c",
1011    "src/f32-raddexpminusmax/gen/avx2-p5-x72-acc3.c",
1012    "src/f32-raddexpminusmax/gen/avx2-p5-x80.c",
1013    "src/f32-raddexpminusmax/gen/avx2-p5-x80-acc2.c",
1014    "src/f32-raddexpminusmax/gen/avx2-p5-x80-acc5.c",
1015    "src/f32-raddexpminusmax/gen/avx2-p5-x96.c",
1016    "src/f32-raddexpminusmax/gen/avx2-p5-x96-acc2.c",
1017    "src/f32-raddexpminusmax/gen/avx2-p5-x96-acc3.c",
1018    "src/f32-raddexpminusmax/gen/avx2-p5-x96-acc6.c",
1019    "src/f32-raddextexp/gen/avx2-p5-x64.c",
1020    "src/f32-raddextexp/gen/avx2-p5-x64-acc2.c",
1021    "src/f32-raddextexp/gen/avx2-p5-x64-acc4.c",
1022    "src/f32-raddextexp/gen/avx2-p5-x72.c",
1023    "src/f32-raddextexp/gen/avx2-p5-x72-acc3.c",
1024    "src/f32-raddextexp/gen/avx2-p5-x80.c",
1025    "src/f32-raddextexp/gen/avx2-p5-x80-acc2.c",
1026    "src/f32-raddextexp/gen/avx2-p5-x80-acc5.c",
1027    "src/f32-raddextexp/gen/avx2-p5-x96.c",
1028    "src/f32-raddextexp/gen/avx2-p5-x96-acc2.c",
1029    "src/f32-raddextexp/gen/avx2-p5-x96-acc3.c",
1030    "src/f32-raddextexp/gen/avx2-p5-x96-acc6.c",
1031    "src/f32-raddstoreexpminusmax/gen/avx2-p5-x64.c",
1032    "src/f32-raddstoreexpminusmax/gen/avx2-p5-x64-acc2.c",
1033    "src/f32-raddstoreexpminusmax/gen/avx2-p5-x64-acc4.c",
1034    "src/f32-raddstoreexpminusmax/gen/avx2-p5-x72.c",
1035    "src/f32-raddstoreexpminusmax/gen/avx2-p5-x72-acc3.c",
1036    "src/f32-raddstoreexpminusmax/gen/avx2-p5-x80.c",
1037    "src/f32-raddstoreexpminusmax/gen/avx2-p5-x80-acc2.c",
1038    "src/f32-raddstoreexpminusmax/gen/avx2-p5-x80-acc5.c",
1039    "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96.c",
1040    "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96-acc2.c",
1041    "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96-acc3.c",
1042    "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96-acc6.c",
1043    "src/f32-sigmoid/gen/avx2-rr1-p5-div-x8.c",
1044    "src/f32-sigmoid/gen/avx2-rr1-p5-div-x16.c",
1045    "src/f32-sigmoid/gen/avx2-rr1-p5-div-x24.c",
1046    "src/f32-sigmoid/gen/avx2-rr1-p5-div-x32.c",
1047    "src/f32-sigmoid/gen/avx2-rr1-p5-div-x40.c",
1048    "src/f32-sigmoid/gen/avx2-rr1-p5-div-x48.c",
1049    "src/f32-sigmoid/gen/avx2-rr1-p5-div-x56.c",
1050    "src/f32-sigmoid/gen/avx2-rr1-p5-div-x64.c",
1051    "src/f32-sigmoid/gen/avx2-rr1-p5-div-x72.c",
1052    "src/f32-sigmoid/gen/avx2-rr1-p5-div-x80.c",
1053    "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x8.c",
1054    "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x16.c",
1055    "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x24.c",
1056    "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x32.c",
1057    "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x40.c",
1058    "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x48.c",
1059    "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x56.c",
1060    "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x64.c",
1061    "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x72.c",
1062    "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x80.c",
1063    "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x8.c",
1064    "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x16.c",
1065    "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x24.c",
1066    "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x32.c",
1067    "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x40.c",
1068    "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x48.c",
1069    "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x56.c",
1070    "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x64.c",
1071    "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x72.c",
1072    "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x80.c",
1073    "src/f32-vscaleexpminusmax/gen/avx2-p5-x8.c",
1074    "src/f32-vscaleexpminusmax/gen/avx2-p5-x16.c",
1075    "src/f32-vscaleexpminusmax/gen/avx2-p5-x24.c",
1076    "src/f32-vscaleexpminusmax/gen/avx2-p5-x32.c",
1077    "src/f32-vscaleexpminusmax/gen/avx2-p5-x40.c",
1078    "src/f32-vscaleexpminusmax/gen/avx2-p5-x48.c",
1079    "src/f32-vscaleexpminusmax/gen/avx2-p5-x56.c",
1080    "src/f32-vscaleexpminusmax/gen/avx2-p5-x64.c",
1081    "src/f32-vscaleexpminusmax/gen/avx2-p5-x72.c",
1082    "src/f32-vscaleexpminusmax/gen/avx2-p5-x80.c",
1083    "src/f32-vscaleexpminusmax/gen/avx2-p5-x88.c",
1084    "src/f32-vscaleexpminusmax/gen/avx2-p5-x96.c",
1085    "src/f32-vscaleextexp/gen/avx2-p5-x8.c",
1086    "src/f32-vscaleextexp/gen/avx2-p5-x16.c",
1087    "src/f32-vscaleextexp/gen/avx2-p5-x24.c",
1088    "src/f32-vscaleextexp/gen/avx2-p5-x32.c",
1089    "src/f32-vscaleextexp/gen/avx2-p5-x40.c",
1090    "src/f32-vscaleextexp/gen/avx2-p5-x48.c",
1091    "src/f32-vscaleextexp/gen/avx2-p5-x56.c",
1092    "src/f32-vscaleextexp/gen/avx2-p5-x64.c",
1093    "src/f32-vscaleextexp/gen/avx2-p5-x72.c",
1094    "src/f32-vscaleextexp/gen/avx2-p5-x80.c",
1095    "src/f32-vscaleextexp/gen/avx2-p5-x88.c",
1096    "src/f32-vscaleextexp/gen/avx2-p5-x96.c",
1097    "src/math/exp-avx2-p5.c",
1098    "src/math/exp-avx2-perm-p3.c",
1099    "src/math/exp-avx2-perm-p4.c",
1100    "src/math/expminus-avx2-p5.c",
1101    "src/math/extexp-avx2-p5.c",
1102    "src/math/sigmoid-avx2-rr2-p5-div.c",
1103    "src/math/sigmoid-avx2-rr1-p5-div.c",
1104    "src/math/sigmoid-avx2-rr2-p5-nr2fma.c",
1105    "src/math/sigmoid-avx2-rr1-p5-nr2fma.c",
1106    "src/math/sigmoid-avx2-rr2-p5-nr1fma.c",
1107    "src/math/sigmoid-avx2-rr1-p5-nr1fma.c",
1108]
1109
1110AVX512F_UKERNELS = [
1111    "src/f32-clamp/avx512f.c",
1112    "src/f32-dwconv/gen/up32x4-avx512f-acc2.c",
1113    "src/f32-dwconv/gen/up32x4-avx512f.c",
1114    "src/f32-dwconv/gen/up16x4-avx512f-acc2.c",
1115    "src/f32-dwconv/gen/up16x4-avx512f.c",
1116    "src/f32-dwconv/gen/up32x9-avx512f-acc2.c",
1117    "src/f32-dwconv/gen/up32x9-avx512f.c",
1118    "src/f32-dwconv/gen/up16x9-avx512f-acc2.c",
1119    "src/f32-dwconv/gen/up16x9-avx512f.c",
1120    "src/f32-dwconv/gen/up32x25-avx512f-acc2.c",
1121    "src/f32-dwconv/gen/up32x25-avx512f.c",
1122    "src/f32-dwconv/gen/up16x25-avx512f-acc2.c",
1123    "src/f32-dwconv/gen/up16x25-avx512f.c",
1124    "src/f32-gemm/gen/1x16-avx512f-broadcast.c",
1125    "src/f32-gemm/gen/4x16-avx512f-broadcast.c",
1126    "src/f32-gemm/gen/5x16-avx512f-broadcast.c",
1127    "src/f32-gemm/gen/6x16-avx512f-broadcast.c",
1128    "src/f32-gemm/gen/7x16-avx512f-broadcast.c",
1129    "src/f32-gemm/gen/8x16-avx512f-broadcast.c",
1130    "src/f32-gemm/gen-inc/1x16-avx512f-broadcast.c",
1131    "src/f32-gemm/gen-inc/4x16-avx512f-broadcast.c",
1132    "src/f32-gemm/gen-inc/5x16-avx512f-broadcast.c",
1133    "src/f32-gemm/gen-inc/6x16-avx512f-broadcast.c",
1134    "src/f32-gemm/gen-inc/7x16-avx512f-broadcast.c",
1135    "src/f32-gemm/gen-inc/8x16-avx512f-broadcast.c",
1136    "src/f32-hswish/gen/avx512f-x16.c",
1137    "src/f32-hswish/gen/avx512f-x32.c",
1138    "src/f32-igemm/gen/1x16-avx512f-broadcast.c",
1139    "src/f32-igemm/gen/4x16-avx512f-broadcast.c",
1140    "src/f32-igemm/gen/5x16-avx512f-broadcast.c",
1141    "src/f32-igemm/gen/6x16-avx512f-broadcast.c",
1142    "src/f32-igemm/gen/7x16-avx512f-broadcast.c",
1143    "src/f32-igemm/gen/8x16-avx512f-broadcast.c",
1144    "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x128.c",
1145    "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x128-acc2.c",
1146    "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x128-acc4.c",
1147    "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x144.c",
1148    "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x144-acc3.c",
1149    "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x160.c",
1150    "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x160-acc2.c",
1151    "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x160-acc5.c",
1152    "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192.c",
1153    "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192-acc2.c",
1154    "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192-acc3.c",
1155    "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192-acc6.c",
1156    "src/f32-raddextexp/gen/avx512f-p5-scalef-x128.c",
1157    "src/f32-raddextexp/gen/avx512f-p5-scalef-x128-acc2.c",
1158    "src/f32-raddextexp/gen/avx512f-p5-scalef-x128-acc4.c",
1159    "src/f32-raddextexp/gen/avx512f-p5-scalef-x144.c",
1160    "src/f32-raddextexp/gen/avx512f-p5-scalef-x144-acc3.c",
1161    "src/f32-raddextexp/gen/avx512f-p5-scalef-x160.c",
1162    "src/f32-raddextexp/gen/avx512f-p5-scalef-x160-acc2.c",
1163    "src/f32-raddextexp/gen/avx512f-p5-scalef-x160-acc5.c",
1164    "src/f32-raddextexp/gen/avx512f-p5-scalef-x192.c",
1165    "src/f32-raddextexp/gen/avx512f-p5-scalef-x192-acc2.c",
1166    "src/f32-raddextexp/gen/avx512f-p5-scalef-x192-acc3.c",
1167    "src/f32-raddextexp/gen/avx512f-p5-scalef-x192-acc6.c",
1168    "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x128.c",
1169    "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x128-acc2.c",
1170    "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x128-acc4.c",
1171    "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x144.c",
1172    "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x144-acc3.c",
1173    "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x160.c",
1174    "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x160-acc2.c",
1175    "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x160-acc5.c",
1176    "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192.c",
1177    "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc2.c",
1178    "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc3.c",
1179    "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc6.c",
1180    "src/f32-rmax/avx512f.c",
1181    "src/f32-vbinary/gen/vadd-avx512f-x16.c",
1182    "src/f32-vbinary/gen/vadd-avx512f-x32.c",
1183    "src/f32-vbinary/gen/vaddc-avx512f-x16.c",
1184    "src/f32-vbinary/gen/vaddc-avx512f-x32.c",
1185    "src/f32-vbinary/gen/vdiv-avx512f-x16.c",
1186    "src/f32-vbinary/gen/vdiv-avx512f-x32.c",
1187    "src/f32-vbinary/gen/vdivc-avx512f-x16.c",
1188    "src/f32-vbinary/gen/vdivc-avx512f-x32.c",
1189    "src/f32-vbinary/gen/vmax-avx512f-x16.c",
1190    "src/f32-vbinary/gen/vmax-avx512f-x32.c",
1191    "src/f32-vbinary/gen/vmaxc-avx512f-x16.c",
1192    "src/f32-vbinary/gen/vmaxc-avx512f-x32.c",
1193    "src/f32-vbinary/gen/vmin-avx512f-x16.c",
1194    "src/f32-vbinary/gen/vmin-avx512f-x32.c",
1195    "src/f32-vbinary/gen/vminc-avx512f-x16.c",
1196    "src/f32-vbinary/gen/vminc-avx512f-x32.c",
1197    "src/f32-vbinary/gen/vmul-avx512f-x16.c",
1198    "src/f32-vbinary/gen/vmul-avx512f-x32.c",
1199    "src/f32-vbinary/gen/vmulc-avx512f-x16.c",
1200    "src/f32-vbinary/gen/vmulc-avx512f-x32.c",
1201    "src/f32-vbinary/gen/vrdivc-avx512f-x16.c",
1202    "src/f32-vbinary/gen/vrdivc-avx512f-x32.c",
1203    "src/f32-vbinary/gen/vrsubc-avx512f-x16.c",
1204    "src/f32-vbinary/gen/vrsubc-avx512f-x32.c",
1205    "src/f32-vbinary/gen/vsub-avx512f-x16.c",
1206    "src/f32-vbinary/gen/vsub-avx512f-x32.c",
1207    "src/f32-vbinary/gen/vsubc-avx512f-x16.c",
1208    "src/f32-vbinary/gen/vsubc-avx512f-x32.c",
1209    "src/f32-vscale/avx512f-unroll64.c",
1210    "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x16.c",
1211    "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x32.c",
1212    "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x48.c",
1213    "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x64.c",
1214    "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x80.c",
1215    "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x96.c",
1216    "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x112.c",
1217    "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x128.c",
1218    "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x144.c",
1219    "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x160.c",
1220    "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x176.c",
1221    "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x192.c",
1222    "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x16.c",
1223    "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x32.c",
1224    "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x48.c",
1225    "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x64.c",
1226    "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x80.c",
1227    "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x96.c",
1228    "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x112.c",
1229    "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x128.c",
1230    "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x144.c",
1231    "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x160.c",
1232    "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x176.c",
1233    "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x192.c",
1234    "src/math/exp-avx512f-p5-scalef.c",
1235    "src/math/exp-avx512f-p5.c",
1236    "src/math/exp-avx512f-perm-p3.c",
1237    "src/math/exp-avx512f-perm2-p2.c",
1238    "src/math/extexp-avx512f-p5.c",
1239]
1240
1241AARCH32_ASM_UKERNELS = [
1242    "src/q8-dwconv/up8x9-aarch32-neon.S",
1243    "src/f32-gemm/4x8-aarch32-neon-cortex-a53.S",
1244    "src/f32-gemm/gen/4x8-aarch32-neon-cortex-a75.S",
1245    "src/f32-gemm/gen/4x8-aarch32-neon-pld-cortex-a75.S",
1246    "src/f32-gemm/4x8-aarch32-neon-ld64.S",
1247    "src/f32-igemm/4x8-aarch32-neon-ld64.S",
1248    "src/f32-igemm/gen/4x8-aarch32-neon-cortex-a75.S",
1249    "src/f32-igemm/gen/4x8-aarch32-neon-pld-cortex-a75.S",
1250]
1251
1252AARCH64_ASM_UKERNELS = [
1253    "src/f32-dwconv/up4x9-aarch64-neonfma-cortex-a55.S",
1254    "src/f32-dwconv/up4x9-aarch64-neonfma.S",
1255    "src/f32-gemm/gen/1x12-aarch64-neonfma-cortex-a53.S",
1256    "src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a53.S",
1257    "src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a57.S",
1258    "src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a75.S",
1259    "src/f32-gemm/gen/4x12-aarch64-neonfma-cortex-a53.S",
1260    "src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a53.S",
1261    "src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a57.S",
1262    "src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a75.S",
1263    "src/f32-gemm/gen/4x8-aarch64-neonfma-ld128.S",
1264    "src/f32-gemm/gen/4x8-aarch64-neonfma-ld64.S",
1265    "src/f32-gemm/gen/5x8-aarch64-neonfma-cortex-a57.S",
1266    "src/f32-gemm/gen/5x8-aarch64-neonfma-cortex-a75.S",
1267    "src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a53.S",
1268    "src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a73.S",
1269    "src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a57.S",
1270    "src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a75.S",
1271    "src/f32-gemm/gen/6x8-aarch64-neonfma-ld128.S",
1272    "src/f32-gemm/gen/6x8-aarch64-neonfma-ld64.S",
1273    "src/f32-gemm/gen-inc/1x12-aarch64-neonfma-cortex-a53.S",
1274    "src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a53.S",
1275    "src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a57.S",
1276    "src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a75.S",
1277    "src/f32-gemm/gen-inc/4x12-aarch64-neonfma-cortex-a53.S",
1278    "src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a53.S",
1279    "src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a57.S",
1280    "src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a75.S",
1281    "src/f32-gemm/gen-inc/4x8-aarch64-neonfma-ld128.S",
1282    "src/f32-gemm/gen-inc/4x8-aarch64-neonfma-ld64.S",
1283    "src/f32-gemm/gen-inc/5x8-aarch64-neonfma-cortex-a57.S",
1284    "src/f32-gemm/gen-inc/5x8-aarch64-neonfma-cortex-a75.S",
1285    "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a53.S",
1286    "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a73.S",
1287    "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a57.S",
1288    "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a75.S",
1289    "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-ld128.S",
1290    "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-ld64.S",
1291    "src/f32-igemm/1x12-aarch64-neonfma-cortex-a53.S",
1292    "src/f32-igemm/1x8-aarch64-neonfma-cortex-a53.S",
1293    "src/f32-igemm/gen/1x8-aarch64-neonfma-cortex-a57.S",
1294    "src/f32-igemm/gen/1x8-aarch64-neonfma-cortex-a75.S",
1295    "src/f32-igemm/4x12-aarch64-neonfma-cortex-a53.S",
1296    "src/f32-igemm/4x8-aarch64-neonfma-cortex-a53.S",
1297    "src/f32-igemm/gen/4x8-aarch64-neonfma-cortex-a57.S",
1298    "src/f32-igemm/gen/4x8-aarch64-neonfma-cortex-a75.S",
1299    "src/f32-igemm/gen/5x8-aarch64-neonfma-cortex-a57.S",
1300    "src/f32-igemm/gen/5x8-aarch64-neonfma-cortex-a75.S",
1301    "src/f32-igemm/6x8-aarch64-neonfma-cortex-a53.S",
1302    "src/f32-igemm/6x8-aarch64-neonfma-cortex-a73.S",
1303    "src/f32-igemm/gen/6x8-aarch64-neonfma-cortex-a57.S",
1304    "src/f32-igemm/gen/6x8-aarch64-neonfma-cortex-a75.S",
1305]
1306
1307cc_defaults {
1308    name: "xnnpack_internal_default",
1309    vendor_available: true,
1310    sdk_version: "current",
1311    local_include_dirs: [
1312        "include",
1313        "src",
1314    ],
1315    cflags: [
1316        "-std=c99",
1317        "-DXNN_LOG_LEVEL=2",
1318        "-Wno-unused-parameter",
1319        "-Wno-missing-field-initializers",
1320        "-Wno-pointer-arith",
1321    ],
1322    stl: "libc++_static",
1323}
1324
1325cc_library_static {
1326    name: "xnnpack_tables",
1327    defaults: ["xnnpack_internal_default"],
1328    srcs: TABLE_SRCS,
1329}
1330
1331cc_library_static {
1332    name: "xnnpack_im2col",
1333    defaults: ["xnnpack_internal_default"],
1334    srcs: [
1335        "src/im2col.c",
1336    ],
1337}
1338
1339cc_library_static {
1340    name: "xnnpack_indirection",
1341    defaults: ["xnnpack_internal_default"],
1342    srcs: [
1343        "src/indirection.c",
1344    ],
1345    header_libs: [
1346        "fp16_headers",
1347        "fxdiv_headers",
1348    ],
1349    static_libs: [
1350        "libpthreadpool",
1351    ],
1352}
1353
1354cc_library_static {
1355    name: "xnnpack_operator_run",
1356    defaults: ["xnnpack_internal_default"],
1357    srcs: [
1358        "src/operator-run.c",
1359    ],
1360    cflags: [
1361        "-Wno-vla",
1362    ],
1363    header_libs: [
1364        "fp16_headers",
1365        "fxdiv_headers",
1366    ],
1367    static_libs: [
1368        "libclog",
1369        "libpthreadpool",
1370    ],
1371}
1372
1373cc_library_static {
1374    name: "xnnpack_operators",
1375    defaults: ["xnnpack_internal_default"],
1376    srcs: OPERATOR_SRCS + [
1377        "src/memory.c",
1378        "src/operator-delete.c",
1379    ],
1380    header_libs: [
1381        "fp16_headers",
1382        "fxdiv_headers",
1383    ],
1384    static_libs: [
1385        "libclog",
1386        "libpthreadpool",
1387    ],
1388    whole_static_libs: [
1389        "xnnpack_indirection",
1390    ],
1391}
1392
1393cc_library_static {
1394    name: "xnnpack_scalar_ukernels",
1395    defaults: ["xnnpack_internal_default"],
1396    srcs: SCALAR_UKERNELS,
1397    header_libs: [
1398        "fp16_headers",
1399        "fxdiv_headers",
1400    ],
1401    static_libs: [
1402        "libpthreadpool",
1403        "xnnpack_tables",
1404    ],
1405}
1406
1407cc_library_static {
1408    name: "xnnpack_psimd_fastmath_ukernels",
1409    defaults: ["xnnpack_internal_default"],
1410    srcs: PSIMD_FASTMATH_UKERNELS,
1411    cflags: [
1412        "-O3",
1413        "-ffast-math",
1414    ],
1415    arch: {
1416        arm: {
1417            cflags: [
1418                "-marm",
1419                "-mfpu=neon",
1420            ],
1421        },
1422    },
1423    header_libs: [
1424        "fp16_headers",
1425        "psimd_headers",
1426    ],
1427    static_libs: [
1428        "libpthreadpool",
1429        "xnnpack_tables",
1430    ],
1431}
1432
1433cc_library_static {
1434    name: "xnnpack_psimd_accmath_ukernels",
1435    defaults: ["xnnpack_internal_default"],
1436    srcs: PSIMD_ACCMATH_UKERNELS,
1437    cflags: [
1438        "-O3",
1439    ],
1440    arch: {
1441        arm: {
1442            cflags: [
1443                "-marm",
1444                "-mfpu=neon",
1445            ],
1446        },
1447    },
1448    header_libs: [
1449        "fp16_headers",
1450        "psimd_headers",
1451    ],
1452    static_libs: [
1453        "libpthreadpool",
1454        "xnnpack_tables",
1455    ],
1456}
1457
1458cc_library_static {
1459    name: "xnnpack_neon_ukernels",
1460    defaults: ["xnnpack_internal_default"],
1461    arch: {
1462        arm: {
1463            srcs: NEON_UKERNELS,
1464            cflags: [
1465                "-marm",
1466                "-mfpu=neon",
1467            ],
1468        },
1469        arm64: {
1470            srcs: NEON_UKERNELS,
1471        },
1472        x86: { enabled: false, },
1473        x86_64: { enabled: false, },
1474    },
1475    header_libs: [
1476        "fp16_headers",
1477    ],
1478    static_libs: [
1479        "libpthreadpool",
1480        "xnnpack_tables",
1481    ],
1482}
1483
1484cc_library_static {
1485    name: "xnnpack_neonfma_ukernels",
1486    defaults: ["xnnpack_internal_default"],
1487    arch: {
1488        arm: {
1489            srcs: NEONFMA_UKERNELS,
1490            cflags: [
1491                "-marm",
1492                "-mfpu=neon-vfpv4",
1493            ],
1494        },
1495        arm64: {
1496            srcs: NEONFMA_UKERNELS + AARCH64_NEONFMA_UKERNELS,
1497        },
1498        x86: { enabled: false, },
1499        x86_64: { enabled: false, },
1500    },
1501    header_libs: [
1502        "fp16_headers",
1503    ],
1504    static_libs: [
1505        "libpthreadpool",
1506        "xnnpack_tables",
1507    ],
1508}
1509
1510cc_library_static {
1511    name: "xnnpack_neonfp16arith_ukernels",
1512    defaults: ["xnnpack_internal_default"],
1513    arch: {
1514        arm: { enabled: false, },
1515        arm64: {
1516            srcs: AARCH64_NEONFP16ARITH_UKERNELS,
1517            cflags: [
1518                "-march=armv8.2-a+fp16",
1519            ],
1520        },
1521        x86: { enabled: false, },
1522        x86_64: { enabled: false, },
1523    },
1524    header_libs: [
1525        "fp16_headers",
1526    ],
1527    static_libs: [
1528        "libpthreadpool",
1529        "xnnpack_tables",
1530    ],
1531}
1532
1533cc_library_static {
1534    name: "xnnpack_asm_ukernels",
1535    defaults: ["xnnpack_internal_default"],
1536    arch: {
1537        arm: {
1538            srcs: AARCH32_ASM_UKERNELS,
1539        },
1540        arm64: {
1541            srcs: AARCH64_ASM_UKERNELS,
1542        },
1543        x86: { enabled: false, },
1544        x86_64: { enabled: false, },
1545    },
1546}
1547
1548cc_library_static {
1549    name: "xnnpack_sse2_ukernels",
1550    defaults: ["xnnpack_internal_default"],
1551    arch: {
1552        arm: { enabled: false, },
1553        arm64:  { enabled: false, },
1554        x86: {
1555            srcs: SSE_UKERNELS + SSE2_UKERNELS,
1556            cflags: [
1557                "-msse2",
1558            ],
1559        },
1560        x86_64: {
1561            srcs: SSE_UKERNELS + SSE2_UKERNELS,
1562            cflags: [
1563                "-msse2",
1564            ],
1565        },
1566    },
1567    header_libs: [
1568        "fp16_headers",
1569    ],
1570    static_libs: [
1571        "libpthreadpool",
1572        "xnnpack_tables",
1573    ],
1574}
1575
1576cc_library_static {
1577    name: "xnnpack_sse41_ukernels",
1578    defaults: ["xnnpack_internal_default"],
1579    arch: {
1580        arm: { enabled: false, },
1581        arm64:  { enabled: false, },
1582        x86: {
1583            srcs: SSE41_UKERNELS,
1584            cflags: [
1585                "-msse4.1",
1586            ],
1587        },
1588        x86_64: {
1589            srcs: SSE41_UKERNELS,
1590            cflags: [
1591                "-msse4.1",
1592            ],
1593        },
1594    },
1595    header_libs: [
1596        "fp16_headers",
1597    ],
1598    static_libs: [
1599        "libpthreadpool",
1600        "xnnpack_tables",
1601    ],
1602}
1603
1604cc_library_static {
1605    name: "xnnpack_avx_ukernels",
1606    defaults: ["xnnpack_internal_default"],
1607    arch: {
1608        arm: { enabled: false, },
1609        arm64:  { enabled: false, },
1610        x86: {
1611            srcs: AVX_UKERNELS,
1612            cflags: [
1613                "-mavx",
1614            ],
1615        },
1616        x86_64: {
1617            srcs: AVX_UKERNELS,
1618            cflags: [
1619                "-mavx",
1620            ],
1621        },
1622    },
1623    header_libs: [
1624        "fp16_headers",
1625    ],
1626    static_libs: [
1627        "libpthreadpool",
1628        "xnnpack_tables",
1629    ],
1630}
1631
1632cc_library_static {
1633    name: "xnnpack_fma3_ukernels",
1634    defaults: ["xnnpack_internal_default"],
1635    arch: {
1636        arm: { enabled: false, },
1637        arm64:  { enabled: false, },
1638        x86: {
1639            srcs: FMA3_UKERNELS,
1640            cflags: [
1641                "-mfma",
1642            ],
1643        },
1644        x86_64: {
1645            srcs: FMA3_UKERNELS,
1646            cflags: [
1647                "-mfma",
1648            ],
1649        },
1650    },
1651    header_libs: [
1652        "fp16_headers",
1653    ],
1654    static_libs: [
1655        "libpthreadpool",
1656        "xnnpack_tables",
1657    ],
1658}
1659
1660cc_library_static {
1661    name: "xnnpack_avx2_ukernels",
1662    defaults: ["xnnpack_internal_default"],
1663    arch: {
1664        arm: { enabled: false, },
1665        arm64:  { enabled: false, },
1666        x86: {
1667            srcs: AVX2_UKERNELS,
1668            cflags: [
1669                "-mfma",
1670                "-mavx2",
1671            ],
1672        },
1673        x86_64: {
1674            srcs: AVX2_UKERNELS,
1675            cflags: [
1676                "-mfma",
1677                "-mavx2",
1678            ],
1679        },
1680    },
1681    header_libs: [
1682        "fp16_headers",
1683    ],
1684    static_libs: [
1685        "libpthreadpool",
1686        "xnnpack_tables",
1687    ],
1688}
1689
1690cc_library_static {
1691    name: "xnnpack_avx512f_ukernels",
1692    defaults: ["xnnpack_internal_default"],
1693    arch: {
1694        arm: { enabled: false, },
1695        arm64:  { enabled: false, },
1696        x86: {
1697            srcs: AVX512F_UKERNELS,
1698            cflags: [
1699                "-mavx512f",
1700            ],
1701        },
1702        x86_64: {
1703            srcs: AVX512F_UKERNELS,
1704            cflags: [
1705                "-mavx512f",
1706            ],
1707        },
1708    },
1709    header_libs: [
1710        "fp16_headers",
1711    ],
1712    static_libs: [
1713        "libpthreadpool",
1714        "xnnpack_tables",
1715    ],
1716}
1717
1718cc_library_static {
1719    name: "xnnpack_ukernels",
1720    defaults: ["xnnpack_internal_default"],
1721    arch: {
1722        arm: {
1723            whole_static_libs: [
1724                "xnnpack_psimd_fastmath_ukernels",
1725                "xnnpack_psimd_accmath_ukernels",
1726                "xnnpack_neon_ukernels",
1727                "xnnpack_neonfma_ukernels",
1728                "xnnpack_asm_ukernels",
1729            ],
1730        },
1731        arm64: {
1732            whole_static_libs: [
1733                "xnnpack_psimd_fastmath_ukernels",
1734                "xnnpack_psimd_accmath_ukernels",
1735                "xnnpack_neon_ukernels",
1736                "xnnpack_neonfma_ukernels",
1737                "xnnpack_neonfp16arith_ukernels",
1738                "xnnpack_asm_ukernels",
1739            ],
1740        },
1741        x86: {
1742            whole_static_libs: [
1743                "xnnpack_psimd_fastmath_ukernels",
1744                "xnnpack_psimd_accmath_ukernels",
1745                "xnnpack_sse2_ukernels",
1746                "xnnpack_sse41_ukernels",
1747                "xnnpack_avx_ukernels",
1748                "xnnpack_fma3_ukernels",
1749                "xnnpack_avx2_ukernels",
1750                "xnnpack_avx512f_ukernels",
1751            ],
1752        },
1753        x86_64: {
1754            whole_static_libs: [
1755                "xnnpack_psimd_fastmath_ukernels",
1756                "xnnpack_psimd_accmath_ukernels",
1757                "xnnpack_sse2_ukernels",
1758                "xnnpack_sse41_ukernels",
1759                "xnnpack_avx_ukernels",
1760                "xnnpack_fma3_ukernels",
1761                "xnnpack_avx2_ukernels",
1762                "xnnpack_avx512f_ukernels",
1763            ],
1764        },
1765    },
1766    whole_static_libs: [
1767        "xnnpack_scalar_ukernels",
1768        "xnnpack_tables",
1769    ],
1770}
1771
1772cc_library_static {
1773    name: "libXNNPACK",
1774    defaults: ["xnnpack_internal_default"],
1775    export_include_dirs: ["include"],
1776    srcs: [
1777        "src/init.c",
1778        "src/runtime.c",
1779        "src/subgraph.c",
1780        "src/tensor.c",
1781    ],
1782    whole_static_libs: [
1783        "libclog",
1784        "libcpuinfo",
1785        "libpthreadpool",
1786        "xnnpack_ukernels",
1787        "xnnpack_operator_run",
1788        "xnnpack_operators",
1789    ],
1790}
1791
1792// Tests and benchmarks
1793cc_defaults {
1794    name: "xnnpack_tests_default",
1795    vendor_available: true,
1796    stl: "libc++_static",
1797    local_include_dirs: [
1798        "bench",
1799        "models",
1800        "test",
1801        "src",
1802    ],
1803    cflags: [
1804        "-Wno-unused-function"
1805    ],
1806    header_libs: [
1807        "fp16_headers",
1808    ],
1809    static_libs: [
1810        "libXNNPACK",
1811        "libpthreadpool",
1812        "libgmock",
1813    ],
1814    shared_libs: [
1815        "liblog",
1816    ],
1817}
1818
1819cc_library_static {
1820    name: "xnnpack_mobilenet_v1",
1821    defaults: ["xnnpack_tests_default"],
1822    srcs: [
1823        "models/mobilenet-v1.cc",
1824    ],
1825}
1826
1827cc_library_static {
1828    name: "xnnpack_mobilenet_v2",
1829    defaults: ["xnnpack_tests_default"],
1830    srcs: [
1831        "models/mobilenet-v2.cc",
1832    ],
1833}
1834
1835cc_library_static {
1836    name: "xnnpack_mobilenet_v3_large",
1837    defaults: ["xnnpack_tests_default"],
1838    srcs: [
1839        "models/mobilenet-v3-large.cc",
1840    ],
1841}
1842
1843cc_library_static {
1844    name: "xnnpack_mobilenet_v3_small",
1845    defaults: ["xnnpack_tests_default"],
1846    srcs: [
1847        "models/mobilenet-v3-small.cc",
1848    ],
1849}
1850
1851cc_benchmark {
1852    name: "xnnpack_end2end_bench",
1853    defaults: ["xnnpack_tests_default"],
1854    srcs: [
1855        "bench/end2end.cc",
1856        "bench/utils.cc",
1857    ],
1858    cflags: [
1859        "-Wno-unused-result"
1860    ],
1861    static_libs: [
1862        "libcpuinfo",
1863        "libgoogle-benchmark",
1864        "xnnpack_mobilenet_v1",
1865        "xnnpack_mobilenet_v2",
1866        "xnnpack_mobilenet_v3_large",
1867        "xnnpack_mobilenet_v3_small",
1868    ],
1869}
1870
1871cc_test {
1872    name: "xnnpack_add_nc_test",
1873    defaults: ["xnnpack_tests_default"],
1874    srcs: [
1875        "test/add-nc.cc",
1876    ],
1877    test_suites: [
1878        "general-tests",
1879    ],
1880}
1881
1882cc_test {
1883    name: "xnnpack_add_nd_test",
1884    defaults: ["xnnpack_tests_default"],
1885    srcs: [
1886        "test/add-nd.cc",
1887    ],
1888    test_suites: [
1889        "general-tests",
1890    ],
1891}
1892
1893cc_test {
1894    name: "xnnpack_argmax_pooling_nhwc_test",
1895    defaults: ["xnnpack_tests_default"],
1896    srcs: [
1897        "test/argmax-pooling-nhwc.cc",
1898    ],
1899    test_suites: [
1900        "general-tests",
1901    ],
1902}
1903
1904cc_test {
1905    name: "xnnpack_average_pooling_nhwc_test",
1906    defaults: ["xnnpack_tests_default"],
1907    srcs: [
1908        "test/average-pooling-nhwc.cc",
1909    ],
1910    test_suites: [
1911        "general-tests",
1912    ],
1913}
1914
1915cc_test {
1916    name: "xnnpack_channel_pad_nc_test",
1917    defaults: ["xnnpack_tests_default"],
1918    srcs: [
1919        "test/channel-pad-nc.cc",
1920    ],
1921    test_suites: [
1922        "general-tests",
1923    ],
1924}
1925
1926cc_test {
1927    name: "xnnpack_channel_shuffle_nc_test",
1928    defaults: ["xnnpack_tests_default"],
1929    srcs: [
1930        "test/channel-shuffle-nc.cc",
1931    ],
1932    test_suites: [
1933        "general-tests",
1934    ],
1935}
1936
1937cc_test {
1938    name: "xnnpack_clamp_nc_test",
1939    defaults: ["xnnpack_tests_default"],
1940    srcs: [
1941        "test/clamp-nc.cc",
1942    ],
1943    test_suites: [
1944        "general-tests",
1945    ],
1946}
1947
1948cc_test {
1949    name: "xnnpack_convolution_nhwc_test",
1950    defaults: ["xnnpack_tests_default"],
1951    srcs: [
1952        "test/convolution-nhwc.cc",
1953    ],
1954    test_suites: [
1955        "general-tests",
1956    ],
1957}
1958
1959cc_test {
1960    name: "xnnpack_convolution_nchw_test",
1961    defaults: ["xnnpack_tests_default"],
1962    srcs: [
1963        "test/convolution-nchw.cc",
1964    ],
1965    test_suites: [
1966        "general-tests",
1967    ],
1968}
1969
1970cc_test {
1971    name: "xnnpack_deconvolution_nhwc_test",
1972    defaults: ["xnnpack_tests_default"],
1973    srcs: [
1974        "test/deconvolution-nhwc.cc",
1975    ],
1976    test_suites: [
1977        "general-tests",
1978    ],
1979}
1980
1981cc_test {
1982    name: "xnnpack_divide_nd_test",
1983    defaults: ["xnnpack_tests_default"],
1984    srcs: [
1985        "test/divide-nd.cc",
1986    ],
1987    test_suites: [
1988        "general-tests",
1989    ],
1990}
1991
1992cc_test {
1993    name: "xnnpack_fully_connected_nc_test",
1994    defaults: ["xnnpack_tests_default"],
1995    srcs: [
1996        "test/fully-connected-nc.cc",
1997    ],
1998    test_suites: [
1999        "general-tests",
2000    ],
2001}
2002
2003cc_test {
2004    name: "xnnpack_global_average_pooling_nwc_test",
2005    defaults: ["xnnpack_tests_default"],
2006    srcs: [
2007        "test/global-average-pooling-nwc.cc",
2008    ],
2009    test_suites: [
2010        "general-tests",
2011    ],
2012}
2013
2014cc_test {
2015    name: "xnnpack_global_average_pooling_ncw_test",
2016    defaults: ["xnnpack_tests_default"],
2017    srcs: [
2018        "test/global-average-pooling-ncw.cc",
2019    ],
2020    test_suites: [
2021        "general-tests",
2022    ],
2023}
2024
2025cc_test {
2026    name: "xnnpack_hardswish_nc_test",
2027    defaults: ["xnnpack_tests_default"],
2028    srcs: [
2029        "test/hardswish-nc.cc",
2030    ],
2031    test_suites: [
2032        "general-tests",
2033    ],
2034}
2035
2036cc_test {
2037    name: "xnnpack_leaky_relu_nc_test",
2038    defaults: ["xnnpack_tests_default"],
2039    srcs: [
2040        "test/leaky-relu-nc.cc",
2041    ],
2042    test_suites: [
2043        "general-tests",
2044    ],
2045}
2046
2047cc_test {
2048    name: "xnnpack_max_pooling_nhwc_test",
2049    defaults: ["xnnpack_tests_default"],
2050    srcs: [
2051        "test/max-pooling-nhwc.cc",
2052    ],
2053    test_suites: [
2054        "general-tests",
2055    ],
2056}
2057
2058cc_test {
2059    name: "xnnpack_maximum_nd_test",
2060    defaults: ["xnnpack_tests_default"],
2061    srcs: [
2062        "test/maximum-nd.cc",
2063    ],
2064    test_suites: [
2065        "general-tests",
2066    ],
2067}
2068
2069cc_test {
2070    name: "xnnpack_minimum_nd_test",
2071    defaults: ["xnnpack_tests_default"],
2072    srcs: [
2073        "test/minimum-nd.cc",
2074    ],
2075    test_suites: [
2076        "general-tests",
2077    ],
2078}
2079
2080cc_test {
2081    name: "xnnpack_multiply_nd_test",
2082    defaults: ["xnnpack_tests_default"],
2083    srcs: [
2084        "test/multiply-nd.cc",
2085    ],
2086    test_suites: [
2087        "general-tests",
2088    ],
2089}
2090
2091cc_test {
2092    name: "xnnpack_prelu_nc_test",
2093    defaults: ["xnnpack_tests_default"],
2094    srcs: [
2095        "test/prelu-nc.cc",
2096    ],
2097    test_suites: [
2098        "general-tests",
2099    ],
2100}
2101
2102cc_test {
2103    name: "xnnpack_resize_bilinear_nhwc_test",
2104    defaults: ["xnnpack_tests_default"],
2105    srcs: [
2106        "test/resize-bilinear-nhwc.cc",
2107    ],
2108    test_suites: [
2109        "general-tests",
2110    ],
2111}
2112
2113cc_test {
2114    name: "xnnpack_sigmoid_nc_test",
2115    defaults: ["xnnpack_tests_default"],
2116    srcs: [
2117        "test/sigmoid-nc.cc",
2118    ],
2119    test_suites: [
2120        "general-tests",
2121    ],
2122}
2123
2124cc_test {
2125    name: "xnnpack_softmax_nc_test",
2126    defaults: ["xnnpack_tests_default"],
2127    srcs: [
2128        "test/softmax-nc.cc",
2129    ],
2130    test_suites: [
2131        "general-tests",
2132    ],
2133}
2134
2135cc_test {
2136    name: "xnnpack_subtract_nd_test",
2137    defaults: ["xnnpack_tests_default"],
2138    srcs: [
2139        "test/subtract-nd.cc",
2140    ],
2141    test_suites: [
2142        "general-tests",
2143    ],
2144}
2145
2146cc_test {
2147    name: "xnnpack_unpooling_nhwc_test",
2148    defaults: ["xnnpack_tests_default"],
2149    srcs: [
2150        "test/unpooling-nhwc.cc",
2151    ],
2152    test_suites: [
2153        "general-tests",
2154    ],
2155}
2156