/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-scalar-5x1.c | 111 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local 197 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
|
D | 3x3p1-minmax-scalar-6x1.c | 120 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local 220 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local
|
D | 3x3p1-minmax-wasmsimd-arm-splat-5x4.c | 102 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_5x4() local 224 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_5x4() local
|
D | 3x3p1-minmax-wasmsimd-x86-splat-5x4.c | 102 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_5x4() local 224 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_5x4() local
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-5x4.c | 135 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() local 250 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() local
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-5x4.c | 135 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() local 250 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() local
|
D | 3x3p1-minmax-ssse3-5x4.c | 130 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() local 246 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() local
|
D | 3x3p1-minmax-neon-5x4.c | 101 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4() local 224 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4() local
|
D | 3x3p1-minmax-neonfma-5x4.c | 101 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4() local 224 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4() local
|
D | 3x3p1-minmax-wasmsimd-arm-splat-6x4.c | 110 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_6x4() local 250 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_6x4() local
|
D | 3x3p1-minmax-neon-6x4.c | 109 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4() local 250 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4() local
|
D | 3x3p1-minmax-wasmsimd-x86-splat-6x4.c | 110 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_6x4() local 250 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_6x4() local
|
D | 3x3p1-minmax-neonfma-6x4.c | 109 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4() local 250 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4() local
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-6x4.c | 146 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() local 279 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() local
|
D | 3x3p1-minmax-ssse3-6x4.c | 141 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() local 275 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() local
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-6x4.c | 146 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() local 279 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() local
|
D | 5x5p2-minmax-wasmsimd-arm-splat-5x4.c | 115 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local 359 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local 616 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local
|
D | 5x5p2-minmax-wasmsimd-x86-splat-5x4.c | 115 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local 359 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local 616 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local
|
D | 3x3p1-minmax-sse-5x4.c | 159 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 319 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local
|
D | 5x5p2-minmax-neon-5x4.c | 114 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local 359 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local 617 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local
|
D | 5x5p2-minmax-neonfma-5x4.c | 114 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local 359 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local 617 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local
|
D | 5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c | 141 v128_t vo4p0 = vbias; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local 385 v128_t vo4p0 = vbias; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local 642 v128_t vo4p0 = vbias; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local
|
/external/XNNPACK/src/f16-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-neonfp16arith-5x8.c | 101 float16x8_t vo4p0 = vdupq_lane_f16(vget_low_f16(vw01234567), 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() local 227 float16x8_t vo4p0 = vdupq_lane_f16(vget_low_f16(vw01234567), 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() local
|
D | 3x3p1-minmax-neonfp16arith-6x8.c | 109 float16x8_t vo4p0 = vdupq_lane_f16(vget_low_f16(vw01234567), 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() local 253 float16x8_t vo4p0 = vdupq_lane_f16(vget_low_f16(vw01234567), 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() local
|
D | 5x5p2-minmax-neonfp16arith-5x4.c | 112 float16x4_t vo4p0 = vdup_laneq_f16(vw01234567, 0); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4() local 357 float16x4_t vo4p0 = vdup_laneq_f16(vw01234567, 0); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4() local 615 float16x4_t vo4p0 = vdup_laneq_f16(vw01234567, 0); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4() local
|