/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-scalar-2x1-acc2.c | 64 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() local 81 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() 82 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() 88 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() 127 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() 128 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
|
D | 3x3p1-minmax-scalar-2x1.c | 64 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() local 81 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() 82 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() 88 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() 125 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() 126 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
|
D | 5x5p2-minmax-scalar-2x1.c | 85 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 123 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 124 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 134 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 218 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 219 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 229 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 297 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 298 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
|
D | 3x3p1-minmax-scalar-1x1-acc2.c | 58 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2() local 72 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2() 76 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2() 103 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2()
|
D | 3x3p1-minmax-scalar-1x1.c | 58 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1() local 72 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1() 76 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1() 102 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1()
|
D | 3x3p1-minmax-scalar-1x1-acc3.c | 58 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() local 72 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() 76 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() 104 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
|
D | 3x3p1-minmax-scalar-3x1.c | 70 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local 90 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 92 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 100 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 148 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 150 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
|
D | 3x3p1-minmax-scalar-1x1-acc4.c | 58 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc4() local 72 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc4() 76 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc4() 105 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc4()
|
D | 5x5p2-minmax-scalar-2x1-acc3.c | 85 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 123 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 124 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 134 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 222 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 223 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 233 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 305 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 306 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
|
D | 5x5p2-minmax-scalar-2x1-acc2.c | 85 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local 123 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 124 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 134 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 220 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 221 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 231 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 301 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 302 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
|
D | 3x3s2p1-minmax-scalar-1x1-acc2.c | 64 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc2() local 74 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc2() 89 vi1x0 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc2() 112 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc2()
|
D | 3x3s2p1-minmax-scalar-1x1.c | 64 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1() local 74 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1() 89 vi1x0 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1() 111 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1()
|
D | 3x3s2p1-minmax-scalar-1x1-acc4.c | 64 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc4() local 74 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc4() 89 vi1x0 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc4() 114 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc4()
|
D | 3x3s2p1-minmax-scalar-1x1-acc3.c | 64 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc3() local 74 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc3() 89 vi1x0 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc3() 113 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc3()
|
D | 5x5p2-minmax-scalar-1x1-acc4.c | 79 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local 112 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 118 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 178 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 184 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 234 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
|
D | 5x5p2-minmax-scalar-1x1-acc3.c | 79 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local 112 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 118 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 177 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 183 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 232 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
|
D | 5x5p2-minmax-scalar-1x1.c | 79 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local 112 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 118 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 175 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 181 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 228 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
|
D | 5x5p2-minmax-scalar-1x1-acc2.c | 79 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local 112 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 118 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 176 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 182 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 230 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
|
D | 5x5p2-minmax-scalar-1x1-acc5.c | 79 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local 112 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 118 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 179 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 185 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 236 float vo0p1 = vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
|
D | 3x3p1-minmax-scalar-4x1.c | 76 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local 99 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 102 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 112 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 171 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 174 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
|
D | 3x3p1-minmax-scalar-5x1.c | 82 float vi1x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local 108 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 112 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 124 vi1x0 = vi1x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 194 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 198 vo0p0 += vi1x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | wasm-2x4.c | 56 float vi1x0 = i1[0]; in xnn_f32_prelu_ukernel__wasm_2x4() local 70 float vacc1x0 = __builtin_wasm_max_f32(vi1x0, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 71 vi1x0 = __builtin_wasm_min_f32(vi1x0, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 83 vacc1x0 += vi1x0 * vw0; in xnn_f32_prelu_ukernel__wasm_2x4()
|
D | scalar-2x4.c | 55 const float vi1x0 = i1[0]; in xnn_f32_prelu_ukernel__scalar_2x4() local 65 const float vacc1x0 = XNN_UNPREDICTABLE(vi1x0 < 0.0f) ? vi1x0 * vw0 : vi1x0; in xnn_f32_prelu_ukernel__scalar_2x4()
|
/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p0p1c3x8-neonfma-2x1.c | 86 float32x4_t vi1x0 = vld1q_f32(i1); i1 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 109 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 111 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk10c0x4567, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 133 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 135 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk10c1x4567, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 157 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c2x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 159 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk10c2x4567, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 181 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c0x0123, vget_high_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 183 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk11c0x4567, vget_high_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 329 vi1x0 = vcombine_f32(vget_high_f32(vi1x1), vi1x2); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() [all …]
|
D | 3x3s2p0p1c3x8-neon-2x1.c | 84 float32x4_t vi1x0 = vld1q_f32(i1); i1 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 107 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 109 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk10c0x4567, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 131 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 133 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk10c1x4567, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 155 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c2x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 157 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk10c2x4567, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 179 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c0x0123, vget_high_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 181 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk11c0x4567, vget_high_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 327 vi1x0 = vcombine_f32(vget_high_f32(vi1x1), vi1x2); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() [all …]
|