Home
last modified time | relevance | path

Searched refs:vi4x2 (Results 1 – 25 of 48) sorted by relevance

12

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-scalar-2x1.c102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local
155 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
165 vo1p0 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
166 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
173 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
250 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
260 vo1p0 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
261 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
268 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
325 vo1p0 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
[all …]
D5x5p2-minmax-scalar-3x1.c110 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
177 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
189 vo2p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
191 vo1p0 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
193 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
201 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
304 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
316 vo2p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
318 vo1p0 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
320 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
[all …]
D5x5p2-minmax-scalar-1x1-acc4.c94 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local
133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
139 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
145 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
199 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
205 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
211 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
249 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
D5x5p2-minmax-scalar-1x1-acc3.c94 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local
133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
139 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
145 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
198 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
204 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
210 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
247 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
D5x5p2-minmax-scalar-1x1.c94 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local
133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
139 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
145 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
196 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
202 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
208 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
243 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
D5x5p2-minmax-scalar-1x1-acc2.c94 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local
133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
139 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
145 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
197 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
203 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
209 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
245 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
D5x5p2-minmax-scalar-3x1-acc2.c110 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
177 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
189 vo2p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
191 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
193 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
201 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
307 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
319 vo2p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
321 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
323 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
[all …]
D5x5p2-minmax-scalar-2x1-acc3.c102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local
155 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
165 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
166 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
173 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
254 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
264 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
265 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
272 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
333 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
[all …]
D5x5p2-minmax-scalar-2x1-acc2.c102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local
155 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
165 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
166 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
173 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
252 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
262 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
263 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
270 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
329 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
[all …]
D5x5p2-minmax-scalar-1x1-acc5.c94 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local
133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
139 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
145 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
200 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
206 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
212 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
251 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
D5x5s2p2-minmax-scalar-3x1-acc2.c131 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
188 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
222 vo2p0 += vi4x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
227 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
232 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
240 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
339 vo2p0 += vi4x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
344 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
349 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
419 vo2p0 += vi4x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
[all …]
D5x5s2p2-minmax-scalar-3x1.c131 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
188 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
222 vo2p0 += vi4x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
227 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
232 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
240 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
336 vo2p0 += vi4x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
341 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
346 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
413 vo2p0 += vi4x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
[all …]
D5x5s2p2-minmax-scalar-2x1-acc3.c117 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local
161 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
189 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
192 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
199 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
275 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
278 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
333 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
336 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
D5x5s2p2-minmax-scalar-2x1-acc2.c117 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local
161 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
189 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
192 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
199 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
273 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
276 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
329 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
332 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
D5x5s2p2-minmax-scalar-2x1.c117 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local
161 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
189 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
192 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
199 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
271 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
274 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
325 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
328 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
D5x5s2p2-minmax-scalar-1x1-acc4.c102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local
133 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
151 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
157 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
204 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
238 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
D5x5s2p2-minmax-scalar-1x1.c102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local
133 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
151 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
157 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
201 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
232 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
D5x5s2p2-minmax-scalar-1x1-acc2.c102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local
133 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
151 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
157 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
202 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
234 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
D5x5s2p2-minmax-scalar-1x1-acc3.c102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local
133 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
151 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
157 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
203 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
236 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
D5x5s2p2-minmax-scalar-1x1-acc5.c102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local
133 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
151 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
157 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
205 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
240 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
D3x3p1-minmax-scalar-5x1.c103 const float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
151 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
159 vo4p0 += vi4x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
163 vo3p0 += vi4x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
167 vo2p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
/external/XNNPACK/src/f32-conv-hwc2chw/
D3x3s2p1c3x4-wasmsimd-2x2.c145 const v128_t vi4x2 = wasm_v128_load(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local
164 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk20c1, wasm_v32x4_shuffle(vi4x2, vi4x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
185 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk20c2, wasm_v32x4_shuffle(vi4x2, vi4x2, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
206 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk21c0, wasm_v32x4_shuffle(vi4x2, vi4x2, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
227 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk21c1, wasm_v32x4_shuffle(vi4x2, vi4x2, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
295 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk22c1, wasm_v32x4_shuffle(vi4x2, vi4x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
316 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk22c2, wasm_v32x4_shuffle(vi4x2, vi4x2, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
398 v128_t vi4x2 = vzero; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local
405 vi4x2 = wasm_v128_load(i4 + 4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
425 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk20c1, wasm_v32x4_shuffle(vi4x2, vi4x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
[all …]
D3x3s2p1c3x4-sse-2x2.c144 const __m128 vi4x2 = _mm_loadu_ps(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local
163 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk20c1, _mm_shuffle_ps(vi4x2, vi4x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
184 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk20c2, _mm_shuffle_ps(vi4x2, vi4x2, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
205 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk21c0, _mm_shuffle_ps(vi4x2, vi4x2, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
226 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk21c1, _mm_shuffle_ps(vi4x2, vi4x2, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
294 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk22c1, _mm_shuffle_ps(vi4x2, vi4x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
315 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk22c2, _mm_shuffle_ps(vi4x2, vi4x2, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
398 __m128 vi4x2 = _mm_setzero_ps(); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local
405 vi4x2 = _mm_loadu_ps(i4 + 4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
425 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk20c1, _mm_shuffle_ps(vi4x2, vi4x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
[all …]
D3x3s2p1c3x4-neonfma-2x2.c144 const float32x4_t vi4x2 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local
163 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c1, vi4x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
184 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c2, vi4x2, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
205 vo1x1 = vfmaq_laneq_f32(vo1x1, vk21c0, vi4x2, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
226 vo1x1 = vfmaq_laneq_f32(vo1x1, vk21c1, vi4x2, 3); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
294 vo1x0 = vfmaq_laneq_f32(vo1x0, vk22c1, vi4x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
315 vo1x0 = vfmaq_laneq_f32(vo1x0, vk22c2, vi4x2, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
398 float32x4_t vi4x2 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local
405 vi4x2 = vld1q_f32(i4 + 4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
425 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c1, vi4x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
[all …]
D3x3s2p1c3x4-neon-2x2.c144 const float32x4_t vi4x2 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local
163 vo1x1 = vmlaq_lane_f32(vo1x1, vk20c1, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
184 vo1x1 = vmlaq_lane_f32(vo1x1, vk20c2, vget_low_f32(vi4x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
205 vo1x1 = vmlaq_lane_f32(vo1x1, vk21c0, vget_high_f32(vi4x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
226 vo1x1 = vmlaq_lane_f32(vo1x1, vk21c1, vget_high_f32(vi4x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
294 vo1x0 = vmlaq_lane_f32(vo1x0, vk22c1, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
315 vo1x0 = vmlaq_lane_f32(vo1x0, vk22c2, vget_low_f32(vi4x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
396 float32x4_t vi4x2 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local
403 vi4x2 = vld1q_f32(i4 + 4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
423 vo1x1 = vmlaq_lane_f32(vo1x1, vk20c1, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
[all …]

12