Home
last modified time | relevance | path

Searched refs:vk04 (Results 1 – 25 of 92) sorted by relevance

1234

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local
331 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
332 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
333 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
334 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
335 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi4x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
585 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
586 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
587 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
588 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-5x4.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local
331 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
332 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
333 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
334 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
335 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi4x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
585 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
586 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
587 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
588 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
[all …]
D5x5p2-minmax-sse-5x4.c42 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
339 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
340 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
341 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
342 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
343 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi4x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
592 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
593 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
594 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
595 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-4x4-acc2.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local
295 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
296 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
297 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
298 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
516 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
517 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
518 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
519 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
716 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-x86-loadsplat-4x4-acc2.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local
295 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
296 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
297 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
298 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
516 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
517 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
518 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
519 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
716 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-x86-loadsplat-4x4.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4() local
295 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
296 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
297 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
298 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
512 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
513 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
514 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
515 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
708 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-4x4.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4() local
295 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
296 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
297 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
298 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
512 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
513 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
514 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
515 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
708 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
[all …]
D5x5p2-minmax-wasmsimd-x86-loadsplat-3x4-acc2.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2() local
259 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
260 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
261 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
442 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
443 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
444 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
607 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
608 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
609 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
D5x5p2-minmax-wasmsimd-arm-loadsplat-3x4.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local
259 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
260 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
261 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
439 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
440 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
441 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
601 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
602 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
603 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
D5x5p2-minmax-wasmsimd-arm-loadsplat-3x4-acc2.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local
259 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
260 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
261 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
442 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
443 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
444 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
607 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
608 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
609 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
D5x5p2-minmax-wasmsimd-x86-loadsplat-3x4.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local
259 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
260 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
261 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
439 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
440 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
441 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
601 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
602 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
603 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
D5x5p2-minmax-sse-4x4.c42 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local
300 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
301 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
302 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
303 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
514 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
515 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
516 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
517 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
708 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
[all …]
D5x5p2-minmax-sse-4x4-acc2.c42 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local
300 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
301 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
302 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
303 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
518 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
519 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
520 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
521 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
716 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
[all …]
D5x5p2-minmax-sse-3x4-acc2.c42 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local
261 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
262 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
263 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
439 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
440 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
441 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
600 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
601 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
602 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
D5x5p2-minmax-sse-3x4.c42 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local
261 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
262 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
263 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
436 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
437 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
438 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
594 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
595 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
596 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
D5x5p2-minmax-scalar-3x1.c39 const float vk04 = weights[5]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
229 vo0p0 += vi0x4 * vk04; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
230 vo1p0 += vi1x4 * vk04; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
231 vo2p0 += vi2x4 * vk04; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
D5x5p2-minmax-wasmsimd-arm-loadsplat-2x4.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4() local
223 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4()
224 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4()
366 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4()
367 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4()
494 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4()
495 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4()
D5x5p2-minmax-wasmsimd-arm-loadsplat-2x4-acc2.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2() local
223 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2()
224 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2()
368 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2()
369 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2()
498 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2()
499 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2()
D5x5p2-minmax-wasmsimd-x86-loadsplat-2x4-acc2.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2() local
223 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2()
224 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2()
368 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2()
369 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2()
498 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2()
499 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2()
D5x5p2-minmax-wasmsimd-arm-loadsplat-2x4-acc3.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3() local
223 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3()
224 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3()
370 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3()
371 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3()
502 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3()
503 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3()
D5x5p2-minmax-wasmsimd-x86-loadsplat-2x4-acc3.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3() local
223 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3()
224 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3()
370 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3()
371 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3()
502 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3()
503 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3()
D5x5p2-minmax-wasmsimd-x86-loadsplat-2x4.c50 const v128_t vk04 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4() local
223 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4()
224 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4()
366 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4()
367 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4()
494 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4()
495 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4()
D5x5p2-minmax-scalar-2x1.c39 const float vk04 = weights[5]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local
194 vo0p0 += vi0x4 * vk04; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
195 vo1p0 += vi1x4 * vk04; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
D5x5s2p2-minmax-scalar-3x1.c40 const float vk04 = weights[5]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
262 vo0p0 += vi0x4 * vk04; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
263 vo1p0 += vi2x4 * vk04; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
264 vo2p0 += vi4x4 * vk04; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1-acc2.c40 const float vk04 = weights[5]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
262 vo0p0 += vi0x4 * vk04; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
263 vo1p0 += vi2x4 * vk04; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
264 vo2p0 += vi4x4 * vk04; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()

1234