/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5s2p2-minmax-scalar-1x1-acc5.c | 127 float vo0p4 = vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local 193 float vo0p4 = vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local 228 float vo0p4 = vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local
|
D | 5x5p2-minmax-scalar-1x1-acc5.c | 115 float vo0p4 = vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local 182 float vo0p4 = vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local 239 float vo0p4 = vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local
|
D | 5x5p2-minmax-wasmsimd-arm-splat-1x4-acc5.c | 93 v128_t vo0p4 = wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2, 2)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc5() local 203 v128_t vo0p4 = wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2, 2)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc5() local 310 v128_t vo0p4 = wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2, 2)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc5() local
|
D | 5x5p2-minmax-wasmsimd-x86-splat-1x4-acc5.c | 93 v128_t vo0p4 = wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2, 2)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_1x4_acc5() local 203 v128_t vo0p4 = wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2, 2)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_1x4_acc5() local 310 v128_t vo0p4 = wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2, 2)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_1x4_acc5() local
|
D | 5x5p2-minmax-neonfma-1x4-acc5.c | 92 float32x4_t vo0p4 = vmulq_lane_f32(vi3x4567, vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc5() local 203 float32x4_t vo0p4 = vmulq_lane_f32(vi3x4567, vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc5() local 311 float32x4_t vo0p4 = vmulq_lane_f32(vi3x4567, vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc5() local
|
D | 5x5p2-minmax-neon-1x4-acc5.c | 92 float32x4_t vo0p4 = vmulq_lane_f32(vi3x4567, vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc5() local 203 float32x4_t vo0p4 = vmulq_lane_f32(vi3x4567, vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc5() local 311 float32x4_t vo0p4 = vmulq_lane_f32(vi3x4567, vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc5() local
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-1x4-acc5.c | 119 v128_t vo0p4 = wasm_f32x4_mul(vi3x4567, vk32); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_1x4_acc5() local 229 v128_t vo0p4 = wasm_f32x4_mul(vi3x4567, vk32); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_1x4_acc5() local 336 v128_t vo0p4 = wasm_f32x4_mul(vi3x4567, vk32); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_1x4_acc5() local
|
D | 5x5p2-minmax-wasmsimd-x86-loadsplat-1x4-acc5.c | 119 v128_t vo0p4 = wasm_f32x4_mul(vi3x4567, vk32); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_1x4_acc5() local 229 v128_t vo0p4 = wasm_f32x4_mul(vi3x4567, vk32); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_1x4_acc5() local 336 v128_t vo0p4 = wasm_f32x4_mul(vi3x4567, vk32); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_1x4_acc5() local
|
D | 5x5p2-minmax-sse-1x4-acc5.c | 106 __m128 vo0p4 = _mm_mul_ps(vi4x4567, vk42); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local 207 __m128 vo0p4 = _mm_mul_ps(vi4x4567, vk42); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local 317 __m128 vo0p4 = _mm_mul_ps(vi4x4567, vk42); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local
|
D | 5x5s2p2-minmax-neonfma-1x4-acc5.c | 100 float32x4_t vo0p4 = vmulq_lane_f32(vi3x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5() local 218 float32x4_t vo0p4 = vmulq_lane_f32(vi3x8ACE, vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5() local
|
D | 5x5s2p2-minmax-neon-1x4-acc5.c | 100 float32x4_t vo0p4 = vmulq_lane_f32(vi3x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5() local 218 float32x4_t vo0p4 = vmulq_lane_f32(vi3x8ACE, vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5() local
|
D | 5x5s2p2-minmax-wasmsimd-arm-splat-1x4-acc5.c | 122 v128_t vo0p4 = wasm_f32x4_mul(vi3x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2, 2)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc5() local 265 v128_t vo0p4 = wasm_f32x4_mul(vi3x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2, 2)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc5() local
|
D | 5x5s2p2-minmax-wasmsimd-x86-splat-1x4-acc5.c | 122 v128_t vo0p4 = wasm_f32x4_mul(vi3x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2, 2)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc5() local 265 v128_t vo0p4 = wasm_f32x4_mul(vi3x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2, 2)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc5() local
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-1x4-acc5.c | 142 v128_t vo0p4 = wasm_f32x4_mul(vi3x8ACE, vk32); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc5() local 285 v128_t vo0p4 = wasm_f32x4_mul(vi3x8ACE, vk32); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc5() local
|
D | 5x5s2p2-minmax-wasmsimd-arm-loadsplat-1x4-acc5.c | 142 v128_t vo0p4 = wasm_f32x4_mul(vi3x8ACE, vk32); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc5() local 285 v128_t vo0p4 = wasm_f32x4_mul(vi3x8ACE, vk32); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc5() local
|
D | 5x5s2p2-minmax-sse-1x4-acc5.c | 136 __m128 vo0p4 = _mm_mul_ps(vi4x8ACE, vk42); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() local 276 __m128 vo0p4 = _mm_mul_ps(vi4x8ACE, vk42); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() local
|
/external/XNNPACK/src/f16-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-neonfp16arith-1x4-acc5.c | 90 float16x4_t vo0p4 = vmul_laneq_f16(vi3x4567, vwGHIJKLMN, 2); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5() local 201 float16x4_t vo0p4 = vmul_laneq_f16(vi3x4567, vwGHIJKLMN, 2); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5() local 309 float16x4_t vo0p4 = vmul_laneq_f16(vi3x4567, vwGHIJKLMN, 2); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5() local
|
D | 5x5s2p2-minmax-neonfp16arith-1x4-acc5.c | 98 float16x4_t vo0p4 = vmul_laneq_f16(vi3x8ACE9BDF.val[0], vwGHIJKLMN, 2); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5() local 216 float16x4_t vo0p4 = vmul_laneq_f16(vi3x8ACE, vwGHIJKLMN, 2); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5() local
|