Home
last modified time | relevance | path

Searched refs:vadd_f16 (Results 1 – 25 of 55) sorted by relevance

123

/external/ComputeLibrary/src/core/NEON/kernels/convolution/winograd/output_transforms/
Da64_fp16_4x4_3x3.cpp152 … FZ[i][0] = vadd_f16(vadd_f16(vadd_f16(F[i][0], F[i][1]), vadd_f16(F[i][2], F[i][3])), F[i][4]); in a64_fp16_4x4_3x3()
155 …FZ[i][1] = vadd_f16(vsub_f16(F[i][1], F[i][2]), vmul_f16(vsub_f16(F[i][3], F[i][4]), vdup_n_f16(2.… in a64_fp16_4x4_3x3()
158 …FZ[i][2] = vadd_f16(vadd_f16(F[i][1], F[i][2]), vmul_f16(vadd_f16(F[i][3], F[i][4]), vdup_n_f16(4.… in a64_fp16_4x4_3x3()
161 …FZ[i][3] = vadd_f16(vadd_f16(vsub_f16(F[i][1], F[i][2]), vmul_f16(vsub_f16(F[i][3], F[i][4]), vdup… in a64_fp16_4x4_3x3()
168 …f[0][j] = vadd_f16(vadd_f16(vadd_f16(FZ[0][j], FZ[1][j]), vadd_f16(FZ[2][j], FZ[3][j])), FZ[4][j]); in a64_fp16_4x4_3x3()
171 …f[1][j] = vadd_f16(vsub_f16(FZ[1][j], FZ[2][j]), vmul_f16(vsub_f16(FZ[3][j], FZ[4][j]), vdup_n_f16… in a64_fp16_4x4_3x3()
174 …f[2][j] = vadd_f16(vadd_f16(FZ[1][j], FZ[2][j]), vmul_f16(vadd_f16(FZ[3][j], FZ[4][j]), vdup_n_f16… in a64_fp16_4x4_3x3()
177 …f[3][j] = vadd_f16(vadd_f16(vsub_f16(FZ[1][j], FZ[2][j]), vmul_f16(vsub_f16(FZ[3][j], FZ[4][j]), v… in a64_fp16_4x4_3x3()
195 vmax_f16(vmin_f16(vadd_f16(f[i][j], b), vdup_n_f16(output_max)), in a64_fp16_4x4_3x3()
/external/XNNPACK/src/f16-gavgpool-cw/
Dneonfp16arith-x4.c48 vsum0 = vadd_f16(vsum0, vi0); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x4()
49 vsum1 = vadd_f16(vsum1, vi1); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x4()
50 vsum2 = vadd_f16(vsum2, vi2); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x4()
51 vsum3 = vadd_f16(vsum3, vi3); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x4()
66 vsum0 = vadd_f16(vsum0, vi0); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x4()
67 vsum1 = vadd_f16(vsum1, vi1); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x4()
68 vsum2 = vadd_f16(vsum2, vi2); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x4()
69 vsum3 = vadd_f16(vsum3, vi3); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x4()
98 vsum0 = vadd_f16(vsum0, vi0); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x4()
107 vsum0 = vadd_f16(vsum0, vi0); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x4()
Dneonfp16arith-x8.c81 const float16x4_t vsum0_lo = vadd_f16(vget_low_f16(vsum0), vget_high_f16(vsum0)); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8()
82 const float16x4_t vsum1_lo = vadd_f16(vget_low_f16(vsum1), vget_high_f16(vsum1)); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8()
83 const float16x4_t vsum2_lo = vadd_f16(vget_low_f16(vsum2), vget_high_f16(vsum2)); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8()
84 const float16x4_t vsum3_lo = vadd_f16(vget_low_f16(vsum3), vget_high_f16(vsum3)); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8()
122 float16x4_t vsum = vadd_f16(vget_low_f16(vsum0), vget_high_f16(vsum0)); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8()
/external/ComputeLibrary/src/core/NEON/kernels/convolution/winograd/input_transforms/
Da64_fp16_6x6.cpp181 …XTx[0][j] = vsub_f16(vadd_f16(x[4][j], vmul_f16(x[0][j], vdup_n_f16(4.0f))), vmul_f16(x[2][j], vdu… in a64_fp16_6x6()
184 …XTx[1][j] = vsub_f16(vadd_f16(x[3][j], x[4][j]), vmul_f16(vadd_f16(x[1][j], x[2][j]), vdup_n_f16(… in a64_fp16_6x6()
187 …XTx[2][j] = vadd_f16(vsub_f16(x[4][j], x[3][j]), vmul_f16(vsub_f16(x[1][j], x[2][j]), vdup_n_f16(4… in a64_fp16_6x6()
190 …XTx[3][j] = vadd_f16(vsub_f16(x[4][j], x[2][j]), vmul_f16(vsub_f16(x[3][j], x[1][j]), vdup_n_f16(2… in a64_fp16_6x6()
193 …XTx[4][j] = vadd_f16(vsub_f16(x[4][j], x[2][j]), vmul_f16(vsub_f16(x[1][j], x[3][j]), vdup_n_f16(2… in a64_fp16_6x6()
196 …XTx[5][j] = vsub_f16(vadd_f16(x[5][j], vmul_f16(x[1][j], vdup_n_f16(4.0f))), vmul_f16(x[3][j], vdu… in a64_fp16_6x6()
203 …U[i][0] = vsub_f16(vadd_f16(XTx[i][4], vmul_f16(XTx[i][0], vdup_n_f16(4.0f))), vmul_f16(XTx[i][2],… in a64_fp16_6x6()
206 …U[i][1] = vsub_f16(vadd_f16(XTx[i][3], XTx[i][4]), vmul_f16(vadd_f16(XTx[i][1], XTx[i][2]), vdup_n… in a64_fp16_6x6()
209 …U[i][2] = vadd_f16(vsub_f16(XTx[i][4], XTx[i][3]), vmul_f16(vsub_f16(XTx[i][1], XTx[i][2]), vdup_n… in a64_fp16_6x6()
212 …U[i][3] = vadd_f16(vsub_f16(XTx[i][4], XTx[i][2]), vmul_f16(vsub_f16(XTx[i][3], XTx[i][1]), vdup_n… in a64_fp16_6x6()
[all …]
/external/ComputeLibrary/src/core/NEON/kernels/convolution/winograd/weight_transforms/
Da64_fp16_4x4_3x3.cpp137 Ww[1][j] = vmul_n_f16(vadd_f16(vadd_f16(w[0][j], w[1][j]), w[2][j]), -4.0); in a64_fp16_4x4_3x3()
143 …Ww[3][j] = vadd_f16(vadd_f16(w[0][j], vmul_f16(w[1][j], vdup_n_f16(2.0f))), vmul_f16(w[2][j], vdup… in a64_fp16_4x4_3x3()
146 …Ww[4][j] = vadd_f16(vsub_f16(w[0][j], vmul_f16(w[1][j], vdup_n_f16(2.0f))), vmul_f16(w[2][j], vdup… in a64_fp16_4x4_3x3()
161 …V[i][1] = vmul_n_f16(vmul_n_f16(vadd_f16(vadd_f16(Ww[i][0], Ww[i][1]), Ww[i][2]), -4.0), recip576); in a64_fp16_4x4_3x3()
167 …V[i][3] = vmul_n_f16(vadd_f16(vadd_f16(Ww[i][0], vmul_f16(Ww[i][1], vdup_n_f16(2.0f))), vmul_f16(W… in a64_fp16_4x4_3x3()
170 …V[i][4] = vmul_n_f16(vadd_f16(vsub_f16(Ww[i][0], vmul_f16(Ww[i][1], vdup_n_f16(2.0f))), vmul_f16(W… in a64_fp16_4x4_3x3()
/external/XNNPACK/src/f16-dwconv2d-chw/gen/
D5x5p2-minmax-neonfp16arith-1x4-acc5.c168 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5()
169 vo0p2 = vadd_f16(vo0p2, vo0p3); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5()
170 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5()
171 vo0p0 = vadd_f16(vo0p0, vo0p4); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5()
279 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5()
280 vo0p2 = vadd_f16(vo0p2, vo0p3); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5()
281 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5()
282 vo0p0 = vadd_f16(vo0p0, vo0p4); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5()
378 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5()
379 vo0p2 = vadd_f16(vo0p2, vo0p3); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5()
[all …]
D3x3s2p1-minmax-neonfp16arith-1x4-acc4.c98 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4()
99 vo0p2 = vadd_f16(vo0p2, vo0p3); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4()
100 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4()
146 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4()
147 vo0p2 = vadd_f16(vo0p2, vo0p3); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4()
148 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4()
D5x5s2p2-minmax-neonfp16arith-1x4-acc5.c181 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5()
182 vo0p2 = vadd_f16(vo0p2, vo0p3); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5()
183 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5()
184 vo0p0 = vadd_f16(vo0p0, vo0p4); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5()
279 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5()
280 vo0p2 = vadd_f16(vo0p2, vo0p3); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5()
281 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5()
282 vo0p0 = vadd_f16(vo0p0, vo0p4); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5()
D5x5p2-minmax-neonfp16arith-1x4-acc4.c168 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc4()
169 vo0p2 = vadd_f16(vo0p2, vo0p3); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc4()
170 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc4()
278 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc4()
279 vo0p2 = vadd_f16(vo0p2, vo0p3); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc4()
280 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc4()
376 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc4()
377 vo0p2 = vadd_f16(vo0p2, vo0p3); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc4()
378 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc4()
D5x5p2-minmax-neonfp16arith-2x4-acc3.c209 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3()
210 vo1p0 = vadd_f16(vo1p0, vo1p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3()
211 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3()
212 vo1p0 = vadd_f16(vo1p0, vo1p2); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3()
357 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3()
358 vo1p0 = vadd_f16(vo1p0, vo1p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3()
359 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3()
360 vo1p0 = vadd_f16(vo1p0, vo1p2); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3()
490 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3()
491 vo1p0 = vadd_f16(vo1p0, vo1p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3()
[all …]
D3x3s2p1-minmax-neonfp16arith-1x4-acc3.c98 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3()
99 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3()
145 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3()
146 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3()
D5x5s2p2-minmax-neonfp16arith-1x4-acc4.c181 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc4()
182 vo0p2 = vadd_f16(vo0p2, vo0p3); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc4()
183 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc4()
278 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc4()
279 vo0p2 = vadd_f16(vo0p2, vo0p3); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc4()
280 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc4()
D5x5s2p2-minmax-neonfp16arith-2x4-acc3.c238 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3()
239 vo1p0 = vadd_f16(vo1p0, vo1p1); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3()
240 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3()
241 vo1p0 = vadd_f16(vo1p0, vo1p2); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3()
375 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3()
376 vo1p0 = vadd_f16(vo1p0, vo1p1); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3()
377 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3()
378 vo1p0 = vadd_f16(vo1p0, vo1p2); in xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3()
D5x5p2-minmax-neonfp16arith-1x4-acc3.c168 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc3()
169 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc3()
277 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc3()
278 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc3()
374 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc3()
375 vo0p0 = vadd_f16(vo0p0, vo0p2); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc3()
D3x3s2p1-minmax-neonfp16arith-2x4-acc2.c127 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4_acc2()
128 vo1p0 = vadd_f16(vo1p0, vo1p1); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4_acc2()
195 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4_acc2()
196 vo1p0 = vadd_f16(vo1p0, vo1p1); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4_acc2()
D5x5p2-minmax-neonfp16arith-4x4-acc2.c291 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2()
292 vo1p0 = vadd_f16(vo1p0, vo1p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2()
293 vo2p0 = vadd_f16(vo2p0, vo2p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2()
294 vo3p0 = vadd_f16(vo3p0, vo3p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2()
513 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2()
514 vo1p0 = vadd_f16(vo1p0, vo1p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2()
515 vo2p0 = vadd_f16(vo2p0, vo2p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2()
516 vo3p0 = vadd_f16(vo3p0, vo3p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2()
714 vo0p0 = vadd_f16(vo0p0, vo0p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2()
715 vo1p0 = vadd_f16(vo1p0, vo1p1); in xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2()
[all …]
/external/XNNPACK/src/f16-raddstoreexpminusmax/gen/
Dneonfp16arith-rr2-p2-x32.c135 float16x4_t vacc_lo = vadd_f16(vget_low_f16(vacc), vget_high_f16(vacc)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32()
160 vacc_lo = vadd_f16(vacc_lo, vf_lo); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32()
165 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 32))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32()
170 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 48))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32()
Dneonfp16arith-rr2-p2-x32-acc2.c137 float16x4_t vacc_lo = vadd_f16(vget_low_f16(vacc), vget_high_f16(vacc)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc2()
162 vacc_lo = vadd_f16(vacc_lo, vf_lo); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc2()
167 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 32))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc2()
172 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 48))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc2()
Dneonfp16arith-rr2-p2-x40.c149 float16x4_t vacc_lo = vadd_f16(vget_low_f16(vacc), vget_high_f16(vacc)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40()
174 vacc_lo = vadd_f16(vacc_lo, vf_lo); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40()
179 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 32))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40()
184 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 48))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40()
Dneonfp16arith-rr2-p2-x40-acc2.c151 float16x4_t vacc_lo = vadd_f16(vget_low_f16(vacc), vget_high_f16(vacc)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2()
176 vacc_lo = vadd_f16(vacc_lo, vf_lo); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2()
181 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 32))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2()
186 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 48))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2()
Dneonfp16arith-rr2-p2-x32-acc4.c141 float16x4_t vacc_lo = vadd_f16(vget_low_f16(vacc), vget_high_f16(vacc)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc4()
166 vacc_lo = vadd_f16(vacc_lo, vf_lo); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc4()
171 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 32))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc4()
176 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 48))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc4()
Dneonfp16arith-rr2-p2-x40-acc5.c157 float16x4_t vacc_lo = vadd_f16(vget_low_f16(vacc), vget_high_f16(vacc)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5()
182 vacc_lo = vadd_f16(vacc_lo, vf_lo); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5()
187 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 32))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5()
192 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 48))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5()
Dneonfp16arith-rr2-p2-x48.c163 float16x4_t vacc_lo = vadd_f16(vget_low_f16(vacc), vget_high_f16(vacc)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48()
188 vacc_lo = vadd_f16(vacc_lo, vf_lo); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48()
193 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 32))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48()
198 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 48))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48()
Dneonfp16arith-rr2-p2-x48-acc3.c167 float16x4_t vacc_lo = vadd_f16(vget_low_f16(vacc), vget_high_f16(vacc)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc3()
192 vacc_lo = vadd_f16(vacc_lo, vf_lo); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc3()
197 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 32))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc3()
202 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 48))); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc3()
/external/XNNPACK/src/f16-raddstoreexpminusmax/
Dneonfp16arith-rr2-p2.c.in117 float16x4_t vacc_lo = vadd_f16(vget_low_f16(vacc), vget_high_f16(vacc));
142 vacc_lo = vadd_f16(vacc_lo, vf_lo);
147 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 32)));
152 … vacc_lo = vadd_f16(vacc_lo, vreinterpret_f16_u64(vshl_n_u64(vreinterpret_u64_f16(vf_lo), 48)));

123