Home
last modified time | relevance | path

Searched refs:vk30 (Results 1 – 25 of 92) sorted by relevance

1234

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-scalar-3x1.c50 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
142 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
143 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
144 vo2p0 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
269 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
270 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
271 vo2p0 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
374 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
375 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
376 vo2p0 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1.c51 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
177 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
178 vo1p0 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
179 vo2p0 += vi7x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
311 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
312 vo1p0 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
313 vo2p0 += vi7x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
388 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
389 vo1p0 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
390 vo2p0 += vi7x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1-acc2.c51 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
177 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
178 vo1p1 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
179 vo2p1 += vi7x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
314 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
315 vo1p1 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
316 vo2p1 += vi7x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
394 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
395 vo1p1 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
396 vo2p1 += vi7x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
D5x5p2-minmax-scalar-3x1-acc2.c50 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
142 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
143 vo1p1 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
144 vo2p1 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
272 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
273 vo1p1 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
274 vo2p1 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
380 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
381 vo1p1 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
382 vo2p1 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
D5x5p2-minmax-scalar-2x1.c50 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local
128 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
129 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
223 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
224 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
302 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
303 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
D5x5s2p2-minmax-scalar-2x1-acc3.c51 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local
152 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
153 vo1p0 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
254 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
255 vo1p0 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
312 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
313 vo1p0 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
D5x5p2-minmax-scalar-2x1-acc3.c50 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local
128 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
129 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
227 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
228 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
310 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
311 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
D5x5p2-minmax-scalar-2x1-acc2.c50 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local
128 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
129 vo1p1 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
225 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
226 vo1p1 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
306 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
307 vo1p1 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
D5x5s2p2-minmax-scalar-2x1.c51 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local
152 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
153 vo1p0 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
250 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
251 vo1p0 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
304 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
305 vo1p0 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
D5x5s2p2-minmax-scalar-2x1-acc2.c51 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local
152 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
153 vo1p1 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
252 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
253 vo1p1 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
308 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
309 vo1p1 += vi5x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
D5x5p2-minmax-scalar-1x1-acc4.c50 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local
114 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
180 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
236 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
D5x5s2p2-minmax-scalar-1x1-acc2.c51 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local
126 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
189 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
221 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
D5x5p2-minmax-scalar-1x1-acc3.c50 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local
114 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
179 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
234 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
D5x5p2-minmax-scalar-1x1.c50 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local
114 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
177 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
230 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
D5x5s2p2-minmax-scalar-1x1-acc4.c51 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local
126 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
191 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
225 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
D5x5s2p2-minmax-scalar-1x1.c51 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local
126 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
188 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
219 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
D5x5p2-minmax-scalar-1x1-acc2.c50 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local
114 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
178 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
232 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
D5x5s2p2-minmax-scalar-1x1-acc3.c51 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local
126 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
190 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
223 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
D5x5p2-minmax-scalar-1x1-acc5.c50 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local
114 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
181 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
238 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
D5x5s2p2-minmax-scalar-1x1-acc5.c51 const float vk30 = weights[16]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local
126 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
192 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
227 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
D5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c61 const v128_t vk30 = wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local
260 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
261 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
262 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
263 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
264 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
514 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
515 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
516 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
517 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-5x4.c61 const v128_t vk30 = wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local
260 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
261 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
262 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
263 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
264 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
514 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
515 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
516 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
517 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
[all …]
D5x5p2-minmax-sse-5x4.c53 const __m128 vk30 = _mm_load1_ps(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
282 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
283 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
284 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi5x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
285 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi6x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
286 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi7x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
535 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
536 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
537 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi5x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
538 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi6x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-4x4-acc2.c61 const v128_t vk30 = wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local
234 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
235 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
236 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi5x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
237 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi6x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
455 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
456 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
457 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi5x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
458 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi6x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
662 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-x86-loadsplat-4x4-acc2.c61 const v128_t vk30 = wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local
234 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
235 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
236 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi5x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
237 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi6x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
455 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
456 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
457 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi5x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
458 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi6x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
662 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
[all …]

1234