Home
last modified time | relevance | path

Searched refs:vk42 (Results 1 – 25 of 92) sorted by relevance

1234

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-scalar-3x1.c57 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
193 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
194 vo1p0 += vi5x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
195 vo2p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
320 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
321 vo1p0 += vi5x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
322 vo2p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
409 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
410 vo1p0 += vi5x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
411 vo2p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1.c58 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
232 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
233 vo1p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
234 vo2p0 += vi8x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
346 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
347 vo1p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
348 vo2p0 += vi8x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
423 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
424 vo1p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
425 vo2p0 += vi8x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1-acc2.c58 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
232 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
233 vo1p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
234 vo2p0 += vi8x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
349 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
350 vo1p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
351 vo2p0 += vi8x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
429 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
430 vo1p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
431 vo2p0 += vi8x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
D5x5p2-minmax-scalar-3x1-acc2.c57 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
193 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
194 vo1p0 += vi5x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
195 vo2p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
323 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
324 vo1p0 += vi5x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
325 vo2p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
415 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
416 vo1p0 += vi5x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
417 vo2p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
D5x5p2-minmax-scalar-2x1.c57 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local
166 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
167 vo1p0 += vi5x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
261 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
262 vo1p0 += vi5x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
326 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
327 vo1p0 += vi5x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
D5x5s2p2-minmax-scalar-2x1-acc3.c58 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local
192 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
193 vo1p2 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
278 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
279 vo1p2 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
336 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
337 vo1p2 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
D5x5p2-minmax-scalar-2x1-acc3.c57 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local
166 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
167 vo1p2 += vi5x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
265 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
266 vo1p2 += vi5x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
334 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
335 vo1p2 += vi5x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
D5x5p2-minmax-scalar-2x1-acc2.c57 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local
166 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
167 vo1p0 += vi5x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
263 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
264 vo1p0 += vi5x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
330 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
331 vo1p0 += vi5x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
D5x5s2p2-minmax-scalar-2x1.c58 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local
192 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
193 vo1p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
274 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
275 vo1p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
328 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
329 vo1p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
D5x5s2p2-minmax-scalar-2x1-acc2.c58 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local
192 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
193 vo1p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
276 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
277 vo1p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
332 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
333 vo1p0 += vi6x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
D5x5p2-minmax-scalar-1x1-acc4.c57 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local
139 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
205 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
249 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
D5x5s2p2-minmax-scalar-1x1-acc2.c58 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local
151 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
202 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
234 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
D5x5p2-minmax-scalar-1x1-acc3.c57 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local
139 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
204 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
247 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
D5x5p2-minmax-scalar-1x1.c57 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local
139 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
202 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
243 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
D5x5s2p2-minmax-scalar-1x1-acc4.c58 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local
151 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
204 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
238 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
D5x5s2p2-minmax-scalar-1x1.c58 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local
151 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
201 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
232 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
D5x5p2-minmax-scalar-1x1-acc2.c57 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local
139 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
203 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
245 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
D5x5s2p2-minmax-scalar-1x1-acc3.c58 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local
151 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
203 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
236 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
D5x5p2-minmax-scalar-1x1-acc5.c57 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local
139 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
206 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
251 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
D5x5s2p2-minmax-scalar-1x1-acc5.c58 const float vk42 = weights[23]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local
151 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
205 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
240 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
D5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c68 const v128_t vk42 = wasm_v32x4_shuffle(vwKLMN, vwKLMN, 3, 3, 3, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local
177 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
178 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
179 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
180 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
181 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
431 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
432 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
433 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
434 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-5x4.c68 const v128_t vk42 = wasm_v32x4_shuffle(vwKLMN, vwKLMN, 3, 3, 3, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local
177 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
178 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
179 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
180 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
181 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
431 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
432 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
433 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
434 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
[all …]
D5x5p2-minmax-sse-5x4.c60 const __m128 vk42 = _mm_load1_ps(weights + 23); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
158 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
159 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi5x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
160 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi6x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
161 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
162 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi8x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
411 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
412 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi5x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
413 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi6x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
414 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-4x4-acc2.c68 const v128_t vk42 = wasm_v32x4_shuffle(vwKLMN, vwKLMN, 3, 3, 3, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local
163 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
164 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
165 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
166 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
384 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
385 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
386 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
387 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
599 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-x86-loadsplat-4x4-acc2.c68 const v128_t vk42 = wasm_v32x4_shuffle(vwKLMN, vwKLMN, 3, 3, 3, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local
163 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
164 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
165 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
166 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
384 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
385 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
386 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
387 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
599 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x4567, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
[all …]

1234