/external/XNNPACK/src/f16-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-neonfp16arith-5x4.c | 162 const float16x4_t vi8x3456 = vext_f16(vi8x0123, vi8x4567, 3); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4() local 192 vo4p0 = vfma_laneq_f16(vo4p0, vi8x3456, vwGHIJKLMN, 6); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4() 417 const float16x4_t vi8x3456 = vext_f16(vi8x0123, vi8x4567, 3); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4() local 447 vo4p0 = vfma_laneq_f16(vo4p0, vi8x3456, vwGHIJKLMN, 6); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4() 665 const float16x4_t vi8x3456 = vext_f16(vi8x0123, vi8x4567, 3); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4() local 695 vo4p0 = vfma_laneq_f16(vo4p0, vi8x3456, vwGHIJKLMN, 6); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4()
|
/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c | 191 const v128_t vi8x3456 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local 221 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 445 const v128_t vi8x3456 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local 475 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 692 const v128_t vi8x3456 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local 722 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-5x4.c | 191 const v128_t vi8x3456 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local 221 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 445 const v128_t vi8x3456 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local 475 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 692 const v128_t vi8x3456 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local 722 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
|
D | 5x5p2-minmax-neon-5x4.c | 164 const float32x4_t vi8x3456 = vextq_f32(vi8x0123, vi8x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local 194 vo4p0 = vmlaq_lane_f32(vo4p0, vi8x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() 419 const float32x4_t vi8x3456 = vextq_f32(vi8x0123, vi8x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local 449 vo4p0 = vmlaq_lane_f32(vo4p0, vi8x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() 667 const float32x4_t vi8x3456 = vextq_f32(vi8x0123, vi8x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local 697 vo4p0 = vmlaq_lane_f32(vo4p0, vi8x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
|
D | 5x5p2-minmax-neonfma-5x4.c | 164 const float32x4_t vi8x3456 = vextq_f32(vi8x0123, vi8x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local 194 vo4p0 = vfmaq_lane_f32(vo4p0, vi8x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() 419 const float32x4_t vi8x3456 = vextq_f32(vi8x0123, vi8x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local 449 vo4p0 = vfmaq_lane_f32(vo4p0, vi8x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() 667 const float32x4_t vi8x3456 = vextq_f32(vi8x0123, vi8x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local 697 vo4p0 = vfmaq_lane_f32(vo4p0, vi8x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
|
D | 5x5p2-minmax-sse-5x4.c | 201 const __m128 vi8x3456 = _mm_move_ss(vi8x7456, vi8x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 227 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi8x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 454 const __m128 vi8x3456 = _mm_move_ss(vi8x7456, vi8x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 480 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi8x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 701 const __m128 vi8x3456 = _mm_move_ss(vi8x7456, vi8x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 727 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi8x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
|
D | 5x5p2-minmax-wasmsimd-x86-splat-5x4.c | 165 const v128_t vi8x3456 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local 195 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x3456, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() 419 const v128_t vi8x3456 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local 449 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x3456, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() 666 const v128_t vi8x3456 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local 696 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x3456, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
|
D | 5x5p2-minmax-wasmsimd-arm-splat-5x4.c | 165 const v128_t vi8x3456 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local 195 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x3456, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() 419 const v128_t vi8x3456 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local 449 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x3456, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() 666 const v128_t vi8x3456 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local 696 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x3456, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
|