Home
last modified time | relevance | path

Searched refs:vout2x0123 (Results 1 – 15 of 15) sorted by relevance

/external/XNNPACK/src/bf16-gemm/gen/
D3x4c8-minmax-neonbf16-bfdot.c176 bfloat16x4_t vout2x0123 = vcvt_bf16_f32(vacc2x0123); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot() local
183 vst1_bf16(c2, vout2x0123); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
195 vst1_lane_u32((void*) c2, vreinterpret_u32_bf16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
199vout2x0123 = vreinterpret_bf16_u16(vext_u16(vreinterpret_u16_bf16(vout2x0123), vreinterpret_u16_bf… in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
204 vst1_lane_bf16(c2, vout2x0123, 0); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
D3x4c8-minmax-neonbf16-bfmlal.c201 bfloat16x4_t vout2x0123 = vcvt_bf16_f32(vacc2x0123); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal() local
208 vst1_bf16(c2, vout2x0123); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
220 vst1_lane_u32((void*) c2, vreinterpret_u32_bf16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
224vout2x0123 = vreinterpret_bf16_u16(vext_u16(vreinterpret_u16_bf16(vout2x0123), vreinterpret_u16_bf… in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
229 vst1_lane_bf16(c2, vout2x0123, 0); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
D4x8c2-minmax-neonbf16-bfdot-lane-ld128.c270 bfloat16x4_t vout2x0123 = vcvt_bf16_f32(vacc2x0123); in xnn_bf16_gemm_minmax_ukernel_4x8c2__neonbf16_bfdot_lane_ld128() local
284 vst1_bf16(c2, vout2x0123); in xnn_bf16_gemm_minmax_ukernel_4x8c2__neonbf16_bfdot_lane_ld128()
301 vst1_bf16(c2, vout2x0123); c2 += 4; in xnn_bf16_gemm_minmax_ukernel_4x8c2__neonbf16_bfdot_lane_ld128()
306 vout2x0123 = vout2x4567; in xnn_bf16_gemm_minmax_ukernel_4x8c2__neonbf16_bfdot_lane_ld128()
312 vst1_lane_u32((void*) c2, vreinterpret_u32_bf16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_4x8c2__neonbf16_bfdot_lane_ld128()
317vout2x0123 = vreinterpret_bf16_u16(vext_u16(vreinterpret_u16_bf16(vout2x0123), vreinterpret_u16_bf… in xnn_bf16_gemm_minmax_ukernel_4x8c2__neonbf16_bfdot_lane_ld128()
323 vst1_lane_bf16(c2, vout2x0123, 0); in xnn_bf16_gemm_minmax_ukernel_4x8c2__neonbf16_bfdot_lane_ld128()
D4x4c8-minmax-neonbf16-bfdot.c210 bfloat16x4_t vout2x0123 = vcvt_bf16_f32(vacc2x0123); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot() local
218 vst1_bf16(c2, vout2x0123); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot()
233 vst1_lane_u32((void*) c2, vreinterpret_u32_bf16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot()
238vout2x0123 = vreinterpret_bf16_u16(vext_u16(vreinterpret_u16_bf16(vout2x0123), vreinterpret_u16_bf… in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot()
244 vst1_lane_bf16(c2, vout2x0123, 0); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot()
D3x4c8-minmax-neonfma-shland.c258 uint16x4_t vout2x0123 = vshrn_n_u32(vreinterpretq_u32_f32(vacc2x0123), 16); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
265 vst1_u16(c2, vout2x0123); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
277 vst1_lane_u32((void*) c2, vreinterpret_u32_u16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
281 vout2x0123 = vext_u16(vout2x0123, vout2x0123, 2); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
286 vst1_lane_u16(c2, vout2x0123, 0); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
D3x4c8-minmax-neonfma-zip.c258 uint16x4_t vout2x0123 = vshrn_n_u32(vreinterpretq_u32_f32(vacc2x0123), 16); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
265 vst1_u16(c2, vout2x0123); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
277 vst1_lane_u32((void*) c2, vreinterpret_u32_u16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
281 vout2x0123 = vext_u16(vout2x0123, vout2x0123, 2); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
286 vst1_lane_u16(c2, vout2x0123, 0); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
D4x4c8-minmax-neonbf16-bfmlal.c243 bfloat16x4_t vout2x0123 = vcvt_bf16_f32(vacc2x0123); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal() local
251 vst1_bf16(c2, vout2x0123); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
266 vst1_lane_u32((void*) c2, vreinterpret_u32_bf16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
271vout2x0123 = vreinterpret_bf16_u16(vext_u16(vreinterpret_u16_bf16(vout2x0123), vreinterpret_u16_bf… in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
277 vst1_lane_bf16(c2, vout2x0123, 0); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
D5x4c8-minmax-neonbf16-bfdot.c244 bfloat16x4_t vout2x0123 = vcvt_bf16_f32(vacc2x0123); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfdot() local
253 vst1_bf16(c2, vout2x0123); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfdot()
271 vst1_lane_u32((void*) c2, vreinterpret_u32_bf16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfdot()
277vout2x0123 = vreinterpret_bf16_u16(vext_u16(vreinterpret_u16_bf16(vout2x0123), vreinterpret_u16_bf… in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfdot()
284 vst1_lane_bf16(c2, vout2x0123, 0); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfdot()
D5x8c2-minmax-neonbf16-bfdot-lane-ld128.c312 bfloat16x4_t vout2x0123 = vcvt_bf16_f32(vacc2x0123); in xnn_bf16_gemm_minmax_ukernel_5x8c2__neonbf16_bfdot_lane_ld128() local
328 vst1_bf16(c2, vout2x0123); in xnn_bf16_gemm_minmax_ukernel_5x8c2__neonbf16_bfdot_lane_ld128()
349 vst1_bf16(c2, vout2x0123); c2 += 4; in xnn_bf16_gemm_minmax_ukernel_5x8c2__neonbf16_bfdot_lane_ld128()
355 vout2x0123 = vout2x4567; in xnn_bf16_gemm_minmax_ukernel_5x8c2__neonbf16_bfdot_lane_ld128()
362 vst1_lane_u32((void*) c2, vreinterpret_u32_bf16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_5x8c2__neonbf16_bfdot_lane_ld128()
368vout2x0123 = vreinterpret_bf16_u16(vext_u16(vreinterpret_u16_bf16(vout2x0123), vreinterpret_u16_bf… in xnn_bf16_gemm_minmax_ukernel_5x8c2__neonbf16_bfdot_lane_ld128()
375 vst1_lane_bf16(c2, vout2x0123, 0); in xnn_bf16_gemm_minmax_ukernel_5x8c2__neonbf16_bfdot_lane_ld128()
D6x8c2-minmax-neonbf16-bfdot-lane-ld128.c354 bfloat16x4_t vout2x0123 = vcvt_bf16_f32(vacc2x0123); in xnn_bf16_gemm_minmax_ukernel_6x8c2__neonbf16_bfdot_lane_ld128() local
372 vst1_bf16(c2, vout2x0123); in xnn_bf16_gemm_minmax_ukernel_6x8c2__neonbf16_bfdot_lane_ld128()
397 vst1_bf16(c2, vout2x0123); c2 += 4; in xnn_bf16_gemm_minmax_ukernel_6x8c2__neonbf16_bfdot_lane_ld128()
404 vout2x0123 = vout2x4567; in xnn_bf16_gemm_minmax_ukernel_6x8c2__neonbf16_bfdot_lane_ld128()
412 vst1_lane_u32((void*) c2, vreinterpret_u32_bf16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_6x8c2__neonbf16_bfdot_lane_ld128()
419vout2x0123 = vreinterpret_bf16_u16(vext_u16(vreinterpret_u16_bf16(vout2x0123), vreinterpret_u16_bf… in xnn_bf16_gemm_minmax_ukernel_6x8c2__neonbf16_bfdot_lane_ld128()
427 vst1_lane_bf16(c2, vout2x0123, 0); in xnn_bf16_gemm_minmax_ukernel_6x8c2__neonbf16_bfdot_lane_ld128()
D4x4c8-minmax-neonfma-zip.c310 uint16x4_t vout2x0123 = vshrn_n_u32(vreinterpretq_u32_f32(vacc2x0123), 16); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local
318 vst1_u16(c2, vout2x0123); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
333 vst1_lane_u32((void*) c2, vreinterpret_u32_u16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
338 vout2x0123 = vext_u16(vout2x0123, vout2x0123, 2); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
344 vst1_lane_u16(c2, vout2x0123, 0); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
D4x4c8-minmax-neonfma-shland.c310 uint16x4_t vout2x0123 = vshrn_n_u32(vreinterpretq_u32_f32(vacc2x0123), 16); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local
318 vst1_u16(c2, vout2x0123); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
333 vst1_lane_u32((void*) c2, vreinterpret_u32_u16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
338 vout2x0123 = vext_u16(vout2x0123, vout2x0123, 2); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
344 vst1_lane_u16(c2, vout2x0123, 0); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
D5x4c8-minmax-neonbf16-bfmlal.c285 bfloat16x4_t vout2x0123 = vcvt_bf16_f32(vacc2x0123); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal() local
294 vst1_bf16(c2, vout2x0123); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
312 vst1_lane_u32((void*) c2, vreinterpret_u32_bf16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
318vout2x0123 = vreinterpret_bf16_u16(vext_u16(vreinterpret_u16_bf16(vout2x0123), vreinterpret_u16_bf… in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
325 vst1_lane_bf16(c2, vout2x0123, 0); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
D5x4c8-minmax-neonfma-zip.c362 uint16x4_t vout2x0123 = vshrn_n_u32(vreinterpretq_u32_f32(vacc2x0123), 16); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local
371 vst1_u16(c2, vout2x0123); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
389 vst1_lane_u32((void*) c2, vreinterpret_u32_u16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
395 vout2x0123 = vext_u16(vout2x0123, vout2x0123, 2); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
402 vst1_lane_u16(c2, vout2x0123, 0); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
D5x4c8-minmax-neonfma-shland.c362 uint16x4_t vout2x0123 = vshrn_n_u32(vreinterpretq_u32_f32(vacc2x0123), 16); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local
371 vst1_u16(c2, vout2x0123); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
389 vst1_lane_u32((void*) c2, vreinterpret_u32_u16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
395 vout2x0123 = vext_u16(vout2x0123, vout2x0123, 2); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
402 vst1_lane_u16(c2, vout2x0123, 0); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()