Home
last modified time | relevance | path

Searched refs:vk41 (Results 1 – 25 of 92) sorted by relevance

1234

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-scalar-3x1.c56 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
169 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
170 vo1p0 += vi5x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
171 vo2p0 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
296 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
297 vo1p0 += vi5x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
298 vo2p0 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
393 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
394 vo1p0 += vi5x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
395 vo2p0 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1.c57 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
206 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
207 vo1p0 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
208 vo2p0 += vi8x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
330 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
331 vo1p0 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
332 vo2p0 += vi8x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
407 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
408 vo1p0 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
409 vo2p0 += vi8x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1-acc2.c57 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
206 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
207 vo1p1 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
208 vo2p1 += vi8x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
333 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
334 vo1p1 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
335 vo2p1 += vi8x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
413 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
414 vo1p1 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
415 vo2p1 += vi8x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
D5x5p2-minmax-scalar-3x1-acc2.c56 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
169 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
170 vo1p1 += vi5x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
171 vo2p1 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
299 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
300 vo1p1 += vi5x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
301 vo2p1 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
399 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
400 vo1p1 += vi5x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
401 vo2p1 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
D5x5p2-minmax-scalar-2x1.c56 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local
148 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
149 vo1p0 += vi5x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
243 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
244 vo1p0 += vi5x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
315 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
316 vo1p0 += vi5x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
D5x5s2p2-minmax-scalar-2x1-acc3.c57 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local
173 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
174 vo1p0 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
267 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
268 vo1p0 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
325 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
326 vo1p0 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
D5x5p2-minmax-scalar-2x1-acc3.c56 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local
148 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
149 vo1p0 += vi5x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
247 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
248 vo1p0 += vi5x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
323 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
324 vo1p0 += vi5x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
D5x5p2-minmax-scalar-2x1-acc2.c56 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local
148 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
149 vo1p1 += vi5x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
245 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
246 vo1p1 += vi5x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
319 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
320 vo1p1 += vi5x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
D5x5s2p2-minmax-scalar-2x1.c57 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local
173 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
174 vo1p0 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
263 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
264 vo1p0 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
317 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
318 vo1p0 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
D5x5s2p2-minmax-scalar-2x1-acc2.c57 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local
173 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
174 vo1p1 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
265 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
266 vo1p1 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
321 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
322 vo1p1 += vi6x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
D5x5p2-minmax-scalar-1x1-acc4.c56 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local
127 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
193 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
243 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
D5x5s2p2-minmax-scalar-1x1-acc2.c57 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local
139 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
196 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
228 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
D5x5p2-minmax-scalar-1x1-acc3.c56 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local
127 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
192 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
241 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
D5x5p2-minmax-scalar-1x1.c56 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local
127 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
190 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
237 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
D5x5s2p2-minmax-scalar-1x1-acc4.c57 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local
139 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
198 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
232 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
D5x5s2p2-minmax-scalar-1x1.c57 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local
139 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
195 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
226 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
D5x5p2-minmax-scalar-1x1-acc2.c56 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local
127 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
191 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
239 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
D5x5s2p2-minmax-scalar-1x1-acc3.c57 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local
139 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
197 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
230 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
D5x5p2-minmax-scalar-1x1-acc5.c56 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local
127 vo0p4 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
194 vo0p4 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
245 vo0p4 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
D5x5s2p2-minmax-scalar-1x1-acc5.c57 const float vk41 = weights[22]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local
139 vo0p4 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
199 vo0p4 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
234 vo0p4 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
D5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c67 const v128_t vk41 = wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local
217 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
218 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
219 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
220 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
221 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
471 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
472 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
473 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
474 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-5x4.c67 const v128_t vk41 = wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local
217 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
218 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
219 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
220 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
221 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
471 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
472 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
473 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
474 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
[all …]
D5x5p2-minmax-sse-5x4.c59 const __m128 vk41 = _mm_load1_ps(weights + 22); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
223 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
224 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi5x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
225 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi6x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
226 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
227 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi8x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
476 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
477 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi5x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
478 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi6x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
479 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-4x4-acc2.c67 const v128_t vk41 = wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local
197 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi4x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
198 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi5x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
199 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi6x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
200 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
418 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi4x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
419 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi5x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
420 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi6x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
421 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
633 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi4x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-x86-loadsplat-4x4-acc2.c67 const v128_t vk41 = wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local
197 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi4x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
198 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi5x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
199 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi6x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
200 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
418 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi4x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
419 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi5x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
420 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi6x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
421 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
633 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi4x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
[all …]

1234