Home
last modified time | relevance | path

Searched refs:vk40 (Results 1 – 25 of 92) sorted by relevance

1234

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-scalar-3x1.c55 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
145 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
146 vo1p0 += vi5x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
147 vo2p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
272 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
273 vo1p0 += vi5x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
274 vo2p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
377 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
378 vo1p0 += vi5x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
379 vo2p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1.c56 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
180 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
181 vo1p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
182 vo2p0 += vi8x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
314 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
315 vo1p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
316 vo2p0 += vi8x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
391 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
392 vo1p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
393 vo2p0 += vi8x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1-acc2.c56 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
180 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
181 vo1p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
182 vo2p0 += vi8x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
317 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
318 vo1p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
319 vo2p0 += vi8x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
397 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
398 vo1p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
399 vo2p0 += vi8x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
D5x5p2-minmax-scalar-3x1-acc2.c55 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
145 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
146 vo1p0 += vi5x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
147 vo2p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
275 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
276 vo1p0 += vi5x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
277 vo2p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
383 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
384 vo1p0 += vi5x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
385 vo2p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
D5x5p2-minmax-scalar-2x1.c55 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local
130 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
131 vo1p0 += vi5x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
225 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
226 vo1p0 += vi5x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
304 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
305 vo1p0 += vi5x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
D5x5s2p2-minmax-scalar-2x1-acc3.c56 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local
154 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
155 vo1p1 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
256 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
257 vo1p1 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
314 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
315 vo1p1 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
D5x5p2-minmax-scalar-2x1-acc3.c55 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local
130 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
131 vo1p1 += vi5x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
229 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
230 vo1p1 += vi5x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
312 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
313 vo1p1 += vi5x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
D5x5p2-minmax-scalar-2x1-acc2.c55 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local
130 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
131 vo1p0 += vi5x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
227 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
228 vo1p0 += vi5x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
308 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
309 vo1p0 += vi5x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
D5x5s2p2-minmax-scalar-2x1.c56 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local
154 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
155 vo1p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
252 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
253 vo1p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
306 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
307 vo1p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
D5x5s2p2-minmax-scalar-2x1-acc2.c56 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local
154 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
155 vo1p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
254 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
255 vo1p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
310 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
311 vo1p0 += vi6x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
D5x5p2-minmax-scalar-1x1-acc4.c55 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local
115 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
181 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
237 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
D5x5s2p2-minmax-scalar-1x1-acc2.c56 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local
127 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
190 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
222 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
D5x5p2-minmax-scalar-1x1-acc3.c55 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local
115 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
180 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
235 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
D5x5p2-minmax-scalar-1x1.c55 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local
115 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
178 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
231 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
D5x5s2p2-minmax-scalar-1x1-acc4.c56 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local
127 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
192 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
226 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
D5x5s2p2-minmax-scalar-1x1.c56 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local
127 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
189 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
220 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
D5x5p2-minmax-scalar-1x1-acc2.c55 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local
115 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
179 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
233 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
D5x5s2p2-minmax-scalar-1x1-acc3.c56 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local
127 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
191 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
224 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
D5x5p2-minmax-scalar-1x1-acc5.c55 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local
115 float vo0p4 = vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
182 float vo0p4 = vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
239 float vo0p4 = vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
D5x5s2p2-minmax-scalar-1x1-acc5.c56 const float vk40 = weights[21]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local
127 float vo0p4 = vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
193 float vo0p4 = vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
228 float vo0p4 = vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
D5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c66 const v128_t vk40 = wasm_v32x4_shuffle(vwKLMN, vwKLMN, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local
266 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
267 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
268 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
269 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
270 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
520 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
521 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
522 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
523 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-5x4.c66 const v128_t vk40 = wasm_v32x4_shuffle(vwKLMN, vwKLMN, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local
266 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
267 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
268 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
269 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
270 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
520 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
521 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
522 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
523 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
[all …]
D5x5p2-minmax-sse-5x4.c58 const __m128 vk40 = _mm_load1_ps(weights + 21); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
287 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
288 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi5x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
289 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi6x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
290 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
291 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi8x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
540 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
541 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi5x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
542 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi6x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
543 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-4x4-acc2.c66 const v128_t vk40 = wasm_v32x4_shuffle(vwKLMN, vwKLMN, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local
239 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
240 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
241 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
242 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
460 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
461 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
462 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
463 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
667 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-x86-loadsplat-4x4-acc2.c66 const v128_t vk40 = wasm_v32x4_shuffle(vwKLMN, vwKLMN, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local
239 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
240 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
241 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
242 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
460 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
461 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
462 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
463 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
667 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
[all …]

1234