/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3s2p1-minmax-neon-1x4-acc4.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() 132 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
|
D | 3x3s2p1-minmax-neonfma-1x4-acc2.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() 130 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
|
D | 3x3s2p1-minmax-neonfma-1x4-acc4.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() 132 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
|
D | 3x3s2p1-minmax-neon-1x4-acc3.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() 131 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
|
D | 3x3s2p1-minmax-neon-1x4-acc2.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() 130 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
|
D | 3x3s2p1-minmax-neonfma-1x4-acc3.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() 131 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
|
D | 3x3s2p1-minmax-neonfma-1x4.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() 129 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
|
D | 3x3s2p1-minmax-neon-1x4.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() 129 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc2.c | 64 v128_t vi2x1357 = wasm_f32x4_const_splat(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() local 97 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() 98 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() 146 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-1x4-acc2.c | 74 v128_t vi2x1357 = wasm_f32x4_const_splat(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc2() local 107 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc2() 108 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc2() 156 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-1x4-acc2.c | 64 v128_t vi2x1357 = wasm_f32x4_const_splat(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc2() local 97 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc2() 98 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc2() 146 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-1x4-acc4.c | 64 v128_t vi2x1357 = wasm_f32x4_const_splat(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc4() local 97 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc4() 98 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc4() 148 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4.c | 74 v128_t vi2x1357 = wasm_f32x4_const_splat(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() local 107 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() 108 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() 155 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4-acc2.c | 74 v128_t vi2x1357 = wasm_f32x4_const_splat(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() local 107 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() 108 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() 156 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-1x4.c | 74 v128_t vi2x1357 = wasm_f32x4_const_splat(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4() local 107 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4() 108 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4() 155 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-1x4.c | 64 v128_t vi2x1357 = wasm_f32x4_const_splat(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4() local 97 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4() 98 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4() 145 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc3.c | 64 v128_t vi2x1357 = wasm_f32x4_const_splat(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() local 97 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() 98 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() 147 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-1x4.c | 64 v128_t vi2x1357 = wasm_f32x4_const_splat(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() local 97 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() 98 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() 145 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc4.c | 64 v128_t vi2x1357 = wasm_f32x4_const_splat(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() local 97 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() 98 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() 148 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-1x4-acc3.c | 64 v128_t vi2x1357 = wasm_f32x4_const_splat(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc3() local 97 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc3() 98 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc3() 147 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc3()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4-acc3.c | 74 v128_t vi2x1357 = wasm_f32x4_const_splat(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() local 107 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() 108 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() 157 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3()
|
/external/XNNPACK/src/f16-dwconv2d-chw/gen/ |
D | 3x3s2p1-minmax-neonfp16arith-1x4.c | 63 float16x4_t vi2x1357 = vmov_n_f16(0); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4() local 83 const float16x4_t vi2x7BDF = vext_f16(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4() 129 const float16x4_t vi2x7BDF = vext_f16(vi2x1357, vi2x9BDF, 3); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4()
|
D | 3x3s2p1-minmax-neonfp16arith-1x4-acc3.c | 63 float16x4_t vi2x1357 = vmov_n_f16(0); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3() local 83 const float16x4_t vi2x7BDF = vext_f16(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3() 131 const float16x4_t vi2x7BDF = vext_f16(vi2x1357, vi2x9BDF, 3); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3()
|
D | 3x3s2p1-minmax-neonfp16arith-1x4-acc2.c | 63 float16x4_t vi2x1357 = vmov_n_f16(0); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2() local 83 const float16x4_t vi2x7BDF = vext_f16(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2() 130 const float16x4_t vi2x7BDF = vext_f16(vi2x1357, vi2x9BDF, 3); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2()
|
D | 3x3s2p1-minmax-neonfp16arith-1x4-acc4.c | 63 float16x4_t vi2x1357 = vmov_n_f16(0); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4() local 83 const float16x4_t vi2x7BDF = vext_f16(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4() 132 const float16x4_t vi2x7BDF = vext_f16(vi2x1357, vi2x9BDF, 3); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4()
|