/external/XNNPACK/src/f32-dwconv-spchw/ |
D | 5x5p2-scalar.c | 88 float vi1x3; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() local 94 vi1x3 = *i1; i1 = (const float*) ((uintptr_t) i1 + input_tuple_stride); in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 113 …const float vrow1_accum = vw6 * vi1x0 + vw7 * vi1x1 + vw8 * vi1x2 + vw9 * vi1x3 + vw10 * vi1x4; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 116 vi1x2 = vi1x3; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 117 vi1x3 = vi1x4; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 146 const float vrow1_accum = vw6 * vi1x0 + vw7 * vi1x1 + vw8 * vi1x2 + vw9 * vi1x3; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 149 vi1x2 = vi1x3; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar()
|
D | 5x5s2p2-scalar.c | 91 const float vi1x3 = *i1; i1 = (const float*) ((uintptr_t) i1 + input_tuple_stride); in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() local 106 …const float vrow1_accum = vw6 * vi1x0 + vw7 * vi1x1 + vw8 * vi1x2 + vw9 * vi1x3 + vw10 * vi1x4; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() 108 vi1x1 = vi1x3; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() 132 const float vi1x3 = *i1; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() local 138 const float vrow1_accum = vw6 * vi1x0 + vw7 * vi1x1 + vw8 * vi1x2 + vw9 * vi1x3; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar()
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | scalar-2x4.c | 63 const float vi1x3 = i1[3]; in xnn_f32_prelu_ukernel__scalar_2x4() local 73 float vacc1x3 = signbit(vi1x3) ? vi1x3 * vw3 : vi1x3; in xnn_f32_prelu_ukernel__scalar_2x4()
|
D | wasm-2x4.c | 63 const float vi1x3 = i1[3]; in xnn_f32_prelu_ukernel__wasm_2x4() local 73 float vacc1x3 = signbit(vi1x3) ? vi1x3 * vw3 : vi1x3; in xnn_f32_prelu_ukernel__wasm_2x4()
|
/external/XNNPACK/src/f32-conv-hwc2spchw/ |
D | 3x3s2p1c3x4-neonfma-2x2.c | 232 const float32x4_t vi1x3 = vld1q_f32(i1); i1 += 4; in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 246 vo0x1 = vfmaq_laneq_f32(vo0x1, vk11c2, vi1x3, 0); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 267 vo0x1 = vfmaq_laneq_f32(vo0x1, vk12c0, vi1x3, 1); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 288 vo0x1 = vfmaq_laneq_f32(vo0x1, vk12c1, vi1x3, 2); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 309 vo0x1 = vfmaq_laneq_f32(vo0x1, vk12c2, vi1x3, 3); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 320 vi1x0 = vi1x3; in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 505 float32x4_t vi1x3 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 512 vi1x3 = vld1q_lane_f32(i1 + 8, vi1x3, 0); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 527 vo0x1 = vfmaq_laneq_f32(vo0x1, vk11c2, vi1x3, 0); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2()
|
/external/XNNPACK/src/f32-conv-hwc/ |
D | 3x3s2p1c3x4-neonfma-2x2.c | 213 const float32x4_t vi1x3 = vld1q_f32(i1); i1 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 227 vo0x1 = vfmaq_laneq_f32(vo0x1, vk11c2, vi1x3, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 248 vo0x1 = vfmaq_laneq_f32(vo0x1, vk12c0, vi1x3, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 269 vo0x1 = vfmaq_laneq_f32(vo0x1, vk12c1, vi1x3, 2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 290 vo0x1 = vfmaq_laneq_f32(vo0x1, vk12c2, vi1x3, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 301 vi1x0 = vi1x3; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 509 float32x4_t vi1x3 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 516 vi1x3 = vld1q_lane_f32(i1 + 8, vi1x3, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 531 vo0x1 = vfmaq_laneq_f32(vo0x1, vk11c2, vi1x3, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
|
D | 3x3s2p1c3x8-neonfma-2x2.c | 308 const float32x4_t vi1x3 = vld1q_f32(i1); i1 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 331 vo0x1c0123 = vfmaq_laneq_f32(vo0x1c0123, vk11c2x0123, vi1x3, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 333 vo0x1c4567 = vfmaq_laneq_f32(vo0x1c4567, vk11c2x4567, vi1x3, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 370 vo0x1c0123 = vfmaq_laneq_f32(vo0x1c0123, vk12c0x0123, vi1x3, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 372 vo0x1c4567 = vfmaq_laneq_f32(vo0x1c4567, vk12c0x4567, vi1x3, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 409 vo0x1c0123 = vfmaq_laneq_f32(vo0x1c0123, vk12c1x0123, vi1x3, 2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 411 vo0x1c4567 = vfmaq_laneq_f32(vo0x1c4567, vk12c1x4567, vi1x3, 2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 448 vo0x1c0123 = vfmaq_laneq_f32(vo0x1c0123, vk12c2x0123, vi1x3, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 450 vo0x1c4567 = vfmaq_laneq_f32(vo0x1c4567, vk12c2x4567, vi1x3, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 467 vi1x0 = vi1x3; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|