/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-scalar-2x1.c | 81 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() local 125 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() local
|
D | 5x5p2-minmax-scalar-2x1-acc2.c | 123 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local 220 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local 301 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local
|
D | 5x5p2-minmax-scalar-2x1.c | 123 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 218 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 297 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local
|
D | 3x3p1-minmax-scalar-2x1-acc2.c | 81 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() local 127 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() local
|
D | 5x5p2-minmax-scalar-2x1-acc3.c | 123 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 222 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 305 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local
|
D | 5x5p2-minmax-scalar-3x1.c | 134 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 261 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 366 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
|
D | 3x3p1-minmax-scalar-3x1.c | 90 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local 148 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local
|
D | 5x5p2-minmax-scalar-3x1-acc2.c | 134 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 264 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 372 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
|
D | 5x5s2p2-minmax-scalar-2x1-acc2.c | 147 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local 247 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local 303 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local
|
D | 5x5s2p2-minmax-scalar-2x1.c | 147 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local 245 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local 299 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local
|
D | 3x3p1-minmax-neonfma-2x4.c | 74 float32x4_t vo1p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4() local 143 float32x4_t vo1p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4() local
|
D | 3x3p1-minmax-neon-2x4.c | 74 float32x4_t vo1p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4() local 143 float32x4_t vo1p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4() local
|
D | 3x3p1-minmax-wasmsimd-x86-splat-2x4.c | 77 v128_t vo1p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4() local 145 v128_t vo1p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4() local
|
D | 3x3p1-minmax-wasmsimd-arm-splat-2x4.c | 77 v128_t vo1p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4() local 145 v128_t vo1p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4() local
|
D | 3x3s2p1-minmax-scalar-2x1.c | 89 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1() local 146 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1() local
|
D | 3x3s2p1-minmax-scalar-2x1-acc2.c | 89 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2() local 148 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2() local
|
D | 3x3p1-minmax-neonfma-2x4-acc2.c | 74 float32x4_t vo1p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2() local 145 float32x4_t vo1p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2() local
|
D | 3x3p1-minmax-neon-2x4-acc2.c | 74 float32x4_t vo1p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2() local 145 float32x4_t vo1p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2() local
|
D | 3x3p1-minmax-wasmsimd-arm-splat-2x4-acc2.c | 77 v128_t vo1p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4_acc2() local 147 v128_t vo1p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4_acc2() local
|
D | 3x3p1-minmax-wasmsimd-x86-splat-2x4-acc2.c | 77 v128_t vo1p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4_acc2() local 147 v128_t vo1p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4_acc2() local
|
D | 3x3p1-minmax-scalar-4x1.c | 99 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local 171 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local
|
D | 5x5s2p2-minmax-scalar-2x1-acc3.c | 147 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 249 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 307 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-2x4.c | 101 v128_t vo1p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_2x4() local 162 v128_t vo1p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_2x4() local
|
D | 3x3p1-minmax-ssse3-2x4.c | 94 __m128 vo1p0 = _mm_add_ps(vbias, _mm_mul_ps(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() local 156 __m128 vo1p0 = _mm_add_ps(vbias, _mm_mul_ps(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() local
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-2x4.c | 101 v128_t vo1p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_2x4() local 162 v128_t vo1p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_2x4() local
|