Home
last modified time | relevance | path

Searched refs:vacc6x4567 (Results 1 – 14 of 14) sorted by relevance

/external/XNNPACK/src/qu8-gemm/
D8x8-minmax-neon.c95 int32x4_t vacc6x4567 = vacc0x4567; in xnn_qu8_gemm_minmax_ukernel_8x8__neon() local
134 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa6), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
154 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa6), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
174 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa6), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
194 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa6), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
214 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa6), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
234 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa6), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
254 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa6), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
274 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c7), vget_high_s16(vxa6), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
314 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa6), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
[all …]
/external/XNNPACK/src/qu8-igemm/
D8x8-minmax-neon.c84 int32x4_t vacc6x4567 = vacc0x4567; in xnn_qu8_igemm_minmax_ukernel_8x8__neon() local
160vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa6), 0); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
182vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa6), 1); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
204vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa6), 2); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
226vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa6), 3); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
248vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567), vget_high_s16(vxa6), 0); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
270vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567), vget_high_s16(vxa6), 1); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
292vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567), vget_high_s16(vxa6), 2); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
314vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567), vget_high_s16(vxa6), 3); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
356vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa6), 0); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
[all …]
/external/XNNPACK/src/f32-gemm/gen/
D8x8s4-minmax-neonfma.c98 float32x4_t vacc6x4567 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma() local
131 vacc6x4567 = vfmaq_f32(vacc6x4567, va6, vb4567c0); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
160 vacc6x4567 = vfmaq_f32(vacc6x4567, va6, vb4567c1); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
189 vacc6x4567 = vfmaq_f32(vacc6x4567, va6, vb4567c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
218 vacc6x4567 = vfmaq_f32(vacc6x4567, va6, vb4567c3); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
253 vacc6x4567 = vfmaq_f32(vacc6x4567, va6, vb4567); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
274 vacc6x4567 = vminq_f32(vacc6x4567, vmax); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
292 vacc6x4567 = vmaxq_f32(vacc6x4567, vmin); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
300 vst1q_f32(c6 + 4, vacc6x4567); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
344 vacc6x0123 = vacc6x4567; in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
D8x8s4-minmax-neon.c98 float32x4_t vacc6x4567 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_8x8s4__neon() local
131 vacc6x4567 = vmlaq_f32(vacc6x4567, va6, vb4567c0); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
160 vacc6x4567 = vmlaq_f32(vacc6x4567, va6, vb4567c1); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
189 vacc6x4567 = vmlaq_f32(vacc6x4567, va6, vb4567c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
218 vacc6x4567 = vmlaq_f32(vacc6x4567, va6, vb4567c3); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
253 vacc6x4567 = vmlaq_f32(vacc6x4567, va6, vb4567); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
274 vacc6x4567 = vminq_f32(vacc6x4567, vmax); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
292 vacc6x4567 = vmaxq_f32(vacc6x4567, vmin); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
300 vst1q_f32(c6 + 4, vacc6x4567); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
344 vacc6x0123 = vacc6x4567; in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
/external/XNNPACK/src/f32-gemm/gen-inc/
D8x8s4inc-minmax-neon.c100 float32x4_t vacc6x4567 = vld1q_f32(acc); acc += 4; in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon() local
133 vacc6x4567 = vmlaq_f32(vacc6x4567, va6, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
162 vacc6x4567 = vmlaq_f32(vacc6x4567, va6, vb4567c1); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
191 vacc6x4567 = vmlaq_f32(vacc6x4567, va6, vb4567c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
220 vacc6x4567 = vmlaq_f32(vacc6x4567, va6, vb4567c3); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
255 vacc6x4567 = vmlaq_f32(vacc6x4567, va6, vb4567); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
276 vacc6x4567 = vminq_f32(vacc6x4567, vmax); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
294 vacc6x4567 = vmaxq_f32(vacc6x4567, vmin); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
302 vst1q_f32(c6 + 4, vacc6x4567); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
346 vacc6x0123 = vacc6x4567; in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
D8x8s4inc-minmax-neonfma.c100 float32x4_t vacc6x4567 = vld1q_f32(acc); acc += 4; in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma() local
133 vacc6x4567 = vfmaq_f32(vacc6x4567, va6, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
162 vacc6x4567 = vfmaq_f32(vacc6x4567, va6, vb4567c1); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
191 vacc6x4567 = vfmaq_f32(vacc6x4567, va6, vb4567c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
220 vacc6x4567 = vfmaq_f32(vacc6x4567, va6, vb4567c3); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
255 vacc6x4567 = vfmaq_f32(vacc6x4567, va6, vb4567); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
276 vacc6x4567 = vminq_f32(vacc6x4567, vmax); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
294 vacc6x4567 = vmaxq_f32(vacc6x4567, vmin); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
302 vst1q_f32(c6 + 4, vacc6x4567); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
346 vacc6x0123 = vacc6x4567; in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
/external/XNNPACK/src/f32-igemm/gen/
D8x8s4-minmax-neonfma.c87 float32x4_t vacc6x4567 = vacc0x4567; in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma() local
164 vacc6x4567 = vfmaq_f32(vacc6x4567, va6, vb4567c0); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
193 vacc6x4567 = vfmaq_f32(vacc6x4567, va6, vb4567c1); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
222 vacc6x4567 = vfmaq_f32(vacc6x4567, va6, vb4567c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
251 vacc6x4567 = vfmaq_f32(vacc6x4567, va6, vb4567c3); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
286 vacc6x4567 = vfmaq_f32(vacc6x4567, va6, vb4567); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
311 vacc6x4567 = vminq_f32(vacc6x4567, vmax); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
329 vacc6x4567 = vmaxq_f32(vacc6x4567, vmin); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
337 vst1q_f32(c6 + 4, vacc6x4567); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
372 vacc6x0123 = vacc6x4567; in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
D8x8s4-minmax-neon.c87 float32x4_t vacc6x4567 = vacc0x4567; in xnn_f32_igemm_minmax_ukernel_8x8s4__neon() local
164 vacc6x4567 = vmlaq_f32(vacc6x4567, va6, vb4567c0); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
193 vacc6x4567 = vmlaq_f32(vacc6x4567, va6, vb4567c1); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
222 vacc6x4567 = vmlaq_f32(vacc6x4567, va6, vb4567c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
251 vacc6x4567 = vmlaq_f32(vacc6x4567, va6, vb4567c3); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
286 vacc6x4567 = vmlaq_f32(vacc6x4567, va6, vb4567); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
311 vacc6x4567 = vminq_f32(vacc6x4567, vmax); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
329 vacc6x4567 = vmaxq_f32(vacc6x4567, vmin); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
337 vst1q_f32(c6 + 4, vacc6x4567); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
372 vacc6x0123 = vacc6x4567; in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
/external/XNNPACK/src/f32-ppmm/gen/
D8x8-minmax-neonfma.c79 float32x4_t vacc6x4567 = vacc0x4567; in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() local
106 vacc6x4567 = vfmaq_laneq_f32(vacc6x4567, vb4567, va4567, 2); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
132 vacc6x4567 = vfmaq_f32(vacc6x4567, va6666, vb4567); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
154 vacc6x4567 = vminq_f32(vacc6x4567, vmax); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
172 vacc6x4567 = vmaxq_f32(vacc6x4567, vmin); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
180 vst1q_f32(c6 + 4, vacc6x4567); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
216 vacc6x0123 = vacc6x4567; in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
D8x8-minmax-neon.c79 float32x4_t vacc6x4567 = vacc0x4567; in xnn_f32_ppmm_minmax_ukernel_8x8__neon() local
105 vacc6x4567 = vmlaq_lane_f32(vacc6x4567, vb4567, vget_high_f32(va4567), 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
126 vacc6x4567 = vminq_f32(vacc6x4567, vmax); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
144 vacc6x4567 = vmaxq_f32(vacc6x4567, vmin); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
152 vst1q_f32(c6 + 4, vacc6x4567); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
188 vacc6x0123 = vacc6x4567; in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
/external/XNNPACK/src/qs8-igemm/gen/
D8x8c4-minmax-neondot.c88 int32x4_t vacc6x4567 = vacc0x4567; in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local
162 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
178 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb4567x4567, va6x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
214 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
235 vacc6x4567 = vqrdmulhq_s32(vacc6x4567, vmultiplier); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
254 vacc6x4567 = vsraq_n_s32(vacc6x4567, vbicq_s32(vacc6x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
271 vacc6x4567 = vrshlq_s32(vacc6x4567, vright_shift); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
283 …const int16x8_t vacc6x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc6x0123), vacc6x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
297 …x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc6x0123), vqmovn_s32(vacc6x4567)), voutput_zero_… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
D8x16c4-minmax-neondot.c100 int32x4_t vacc6x4567 = vacc0x4567; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local
194 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
226 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb4567x4567, va6x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
280 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
317 vacc6x4567 = vqrdmulhq_s32(vacc6x4567, vmultiplier); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
352 vacc6x4567 = vsraq_n_s32(vacc6x4567, vbicq_s32(vacc6x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
385 vacc6x4567 = vrshlq_s32(vacc6x4567, vright_shift); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
407 …const int16x8_t vacc6x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc6x0123), vacc6x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
433 …x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc6x0123), vqmovn_s32(vacc6x4567)), voutput_zero_… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
/external/XNNPACK/src/qs8-gemm/gen/
D8x8c4-minmax-neondot.c101 int32x4_t vacc6x4567 = vacc0x4567; in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local
139 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
155 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb4567x4567, va6x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
191 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
213 const int32x4_t vproduct6x4567 = vqrdmulhq_n_s32(vacc6x4567, params->neon.multiplier); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
230 vacc6x4567 = vsraq_n_s32(vproduct6x4567, vbicq_s32(vacc6x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
247 vacc6x4567 = vrshlq_s32(vacc6x4567, vright_shift); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
259 …const int16x8_t vacc6x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc6x0123), vacc6x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
273 …x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc6x0123), vqmovn_s32(vacc6x4567)), voutput_zero_… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
D8x16c4-minmax-neondot.c113 int32x4_t vacc6x4567 = vacc0x4567; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local
171 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
203 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb4567x4567, va6x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
257 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
295 const int32x4_t vproduct6x4567 = vqrdmulhq_n_s32(vacc6x4567, params->neon.multiplier); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
328 vacc6x4567 = vsraq_n_s32(vproduct6x4567, vbicq_s32(vacc6x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
361 vacc6x4567 = vrshlq_s32(vacc6x4567, vright_shift); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
383 …const int16x8_t vacc6x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc6x0123), vacc6x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
409 …x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc6x0123), vqmovn_s32(vacc6x4567)), voutput_zero_… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()