/external/XNNPACK/src/f32-dwconv-spchw/ |
D | 3x3p1-scalar.c | 55 float vi1x1 = *i1; i1 = (const float*) ((uintptr_t) i1 + input_tuple_stride); in xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar() local 67 const float vrow1_accum = vw4 * vi1x0 + vw5 * vi1x1 + vw6 * vi1x2; in xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar() 68 vi1x0 = vi1x1; in xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar() 69 vi1x1 = vi1x2; in xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar() 85 const float vrow1_accum = vw4 * vi1x0 + vw5 * vi1x1; in xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar()
|
D | 5x5p2-scalar.c | 77 float vi1x1 = 0.0f; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() local 113 …const float vrow1_accum = vw6 * vi1x0 + vw7 * vi1x1 + vw8 * vi1x2 + vw9 * vi1x3 + vw10 * vi1x4; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 114 vi1x0 = vi1x1; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 115 vi1x1 = vi1x2; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 146 const float vrow1_accum = vw6 * vi1x0 + vw7 * vi1x1 + vw8 * vi1x2 + vw9 * vi1x3; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 147 vi1x0 = vi1x1; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 148 vi1x1 = vi1x2; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 174 const float vrow1_accum = vw6 * vi1x0 + vw7 * vi1x1 + vw8 * vi1x2; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar()
|
D | 3x3s2p1-scalar.c | 58 const float vi1x1 = *i1; i1 = (const float*) ((uintptr_t) i1 + input_tuple_stride); in xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar() local 66 const float vrow1_accum = vw4 * vi1x0 + vw5 * vi1x1 + vw6 * vi1x2; in xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar() 82 const float vi1x1 = i1[0]; in xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar() local 85 const float vrow1_accum = vw4 * vi1x0 + vw5 * vi1x1; in xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar()
|
D | 5x5s2p2-scalar.c | 77 float vi1x1 = 0.0f; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() local 106 …const float vrow1_accum = vw6 * vi1x0 + vw7 * vi1x1 + vw8 * vi1x2 + vw9 * vi1x3 + vw10 * vi1x4; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() 108 vi1x1 = vi1x3; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() 138 const float vrow1_accum = vw6 * vi1x0 + vw7 * vi1x1 + vw8 * vi1x2 + vw9 * vi1x3; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() 152 const float vrow1_accum = vw6 * vi1x0 + vw7 * vi1x1 + vw8 * vi1x2; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar()
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | scalar-2x4.c | 61 const float vi1x1 = i1[1]; in xnn_f32_prelu_ukernel__scalar_2x4() local 71 float vacc1x1 = signbit(vi1x1) ? vi1x1 * vw1 : vi1x1; in xnn_f32_prelu_ukernel__scalar_2x4()
|
D | wasm-2x4.c | 61 const float vi1x1 = i1[1]; in xnn_f32_prelu_ukernel__wasm_2x4() local 71 float vacc1x1 = signbit(vi1x1) ? vi1x1 * vw1 : vi1x1; in xnn_f32_prelu_ukernel__wasm_2x4()
|
/external/XNNPACK/src/f32-conv-hwc/ |
D | 3x3s2p1c3x8-neonfma-2x2.c | 99 const float32x4_t vi1x1 = vld1q_f32(i1); i1 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 122 vo0x1c0123 = vfmaq_laneq_f32(vo0x1c0123, vk10c0x0123, vi1x1, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 124 vo0x1c4567 = vfmaq_laneq_f32(vo0x1c4567, vk10c0x4567, vi1x1, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 241 vo0x0c0123 = vfmaq_laneq_f32(vo0x0c0123, vk11c0x0123, vi1x1, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 243 vo0x0c4567 = vfmaq_laneq_f32(vo0x0c4567, vk11c0x4567, vi1x1, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 280 vo0x0c0123 = vfmaq_laneq_f32(vo0x0c0123, vk11c1x0123, vi1x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 282 vo0x0c4567 = vfmaq_laneq_f32(vo0x0c4567, vk11c1x4567, vi1x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 326 vo0x0c0123 = vfmaq_laneq_f32(vo0x0c0123, vk11c2x0123, vi1x1, 2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 328 vo0x0c4567 = vfmaq_laneq_f32(vo0x0c4567, vk11c2x4567, vi1x1, 2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 365 vo0x0c0123 = vfmaq_laneq_f32(vo0x0c0123, vk12c0x0123, vi1x1, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 94 const float32x4_t vi1x1 = vld1q_f32(i1); i1 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 108 vo0x1 = vfmaq_laneq_f32(vo0x1, vk10c0, vi1x1, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 176 vo0x0 = vfmaq_laneq_f32(vo0x0, vk11c0, vi1x1, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 197 vo0x0 = vfmaq_laneq_f32(vo0x0, vk11c1, vi1x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 225 vo0x0 = vfmaq_laneq_f32(vo0x0, vk11c2, vi1x1, 2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 246 vo0x0 = vfmaq_laneq_f32(vo0x0, vk12c0, vi1x1, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 366 float32x4_t vi1x1 = vld1q_f32(i1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 383 vo0x1 = vfmaq_laneq_f32(vo0x1, vk10c0, vi1x1, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 463 vo0x0 = vfmaq_laneq_f32(vo0x0, vk11c0, vi1x1, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 490 vo0x0 = vfmaq_laneq_f32(vo0x0, vk11c1, vi1x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
/external/XNNPACK/src/f32-conv-hwc2spchw/ |
D | 3x3s2p1c3x4-neonfma-2x2.c | 113 const float32x4_t vi1x1 = vld1q_f32(i1); i1 += 4; in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 127 vo0x1 = vfmaq_laneq_f32(vo0x1, vk10c0, vi1x1, 3); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 195 vo0x0 = vfmaq_laneq_f32(vo0x0, vk11c0, vi1x1, 0); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 216 vo0x0 = vfmaq_laneq_f32(vo0x0, vk11c1, vi1x1, 1); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 244 vo0x0 = vfmaq_laneq_f32(vo0x0, vk11c2, vi1x1, 2); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 265 vo0x0 = vfmaq_laneq_f32(vo0x0, vk12c0, vi1x1, 3); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 362 float32x4_t vi1x1 = vld1q_f32(i1); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 379 vo0x1 = vfmaq_laneq_f32(vo0x1, vk10c0, vi1x1, 3); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 459 vo0x0 = vfmaq_laneq_f32(vo0x0, vk11c0, vi1x1, 0); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 486 vo0x0 = vfmaq_laneq_f32(vo0x0, vk11c1, vi1x1, 1); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
/external/XNNPACK/src/f32-dwconv/gen/ |
D | up2x4-scalar.c | 59 const float vi1x1 = i1[1]; in xnn_f32_dwconv_ukernel_up2x4__scalar() local 65 vacc1p0 += vi1x1 * vk1x1; in xnn_f32_dwconv_ukernel_up2x4__scalar()
|
D | up2x4-wasm.c | 59 const float vi1x1 = i1[1]; in xnn_f32_dwconv_ukernel_up2x4__wasm() local 65 vacc1p0 += vi1x1 * vk1x1; in xnn_f32_dwconv_ukernel_up2x4__wasm()
|
D | up2x4-wasm-acc2.c | 59 const float vi1x1 = i1[1]; in xnn_f32_dwconv_ukernel_up2x4__wasm_acc2() local 65 float vacc1p1 = vi1x1 * vk1x1; in xnn_f32_dwconv_ukernel_up2x4__wasm_acc2()
|
D | up2x4-scalar-acc2.c | 59 const float vi1x1 = i1[1]; in xnn_f32_dwconv_ukernel_up2x4__scalar_acc2() local 65 float vacc1p1 = vi1x1 * vk1x1; in xnn_f32_dwconv_ukernel_up2x4__scalar_acc2()
|
D | up2x9-wasm.c | 69 const float vi1x1 = i1[1]; in xnn_f32_dwconv_ukernel_up2x9__wasm() local 75 vacc1p0 += vi1x1 * vk1x1; in xnn_f32_dwconv_ukernel_up2x9__wasm()
|
D | up2x9-scalar.c | 69 const float vi1x1 = i1[1]; in xnn_f32_dwconv_ukernel_up2x9__scalar() local 75 vacc1p0 += vi1x1 * vk1x1; in xnn_f32_dwconv_ukernel_up2x9__scalar()
|
D | up2x9-scalar-acc2.c | 69 const float vi1x1 = i1[1]; in xnn_f32_dwconv_ukernel_up2x9__scalar_acc2() local 75 float vacc1p1 = vi1x1 * vk1x1; in xnn_f32_dwconv_ukernel_up2x9__scalar_acc2()
|
D | up2x9-wasm-acc2.c | 69 const float vi1x1 = i1[1]; in xnn_f32_dwconv_ukernel_up2x9__wasm_acc2() local 75 float vacc1p1 = vi1x1 * vk1x1; in xnn_f32_dwconv_ukernel_up2x9__wasm_acc2()
|
D | up2x25-scalar.c | 101 const float vi1x1 = i1[1]; in xnn_f32_dwconv_ukernel_up2x25__scalar() local 107 vacc1p0 += vi1x1 * vk1x1; in xnn_f32_dwconv_ukernel_up2x25__scalar()
|
D | up2x25-scalar-acc2.c | 101 const float vi1x1 = i1[1]; in xnn_f32_dwconv_ukernel_up2x25__scalar_acc2() local 107 float vacc1p1 = vi1x1 * vk1x1; in xnn_f32_dwconv_ukernel_up2x25__scalar_acc2()
|
D | up2x25-wasm-acc2.c | 101 const float vi1x1 = i1[1]; in xnn_f32_dwconv_ukernel_up2x25__wasm_acc2() local 107 float vacc1p1 = vi1x1 * vk1x1; in xnn_f32_dwconv_ukernel_up2x25__wasm_acc2()
|
D | up2x25-wasm.c | 101 const float vi1x1 = i1[1]; in xnn_f32_dwconv_ukernel_up2x25__wasm() local 107 vacc1p0 += vi1x1 * vk1x1; in xnn_f32_dwconv_ukernel_up2x25__wasm()
|