Home
last modified time | relevance | path

Searched refs:vsum1x2 (Results 1 – 16 of 16) sorted by relevance

/external/XNNPACK/src/bf16-gemm/gen/
D2x4c8-minmax-neonbf16-bfdot.c124 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfdot() local
129 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfdot()
D2x4c8-minmax-neonbf16-bfmlal.c141 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() local
146 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal()
D3x4c8-minmax-neonbf16-bfdot.c153 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot() local
160 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
D2x4c8-minmax-neonfma-shland.c188 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local
193 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
D2x4c8-minmax-neonfma-zip.c188 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local
193 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
D3x4c8-minmax-neonbf16-bfmlal.c178 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal() local
185 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
D4x4c8-minmax-neonbf16-bfdot.c182 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot() local
191 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot()
D3x4c8-minmax-neonfma-zip.c235 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
242 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
D4x4c8-minmax-neonbf16-bfmlal.c215 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal() local
224 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
D5x4c8-minmax-neonbf16-bfdot.c211 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfdot() local
222 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfdot()
D3x4c8-minmax-neonfma-shland.c235 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
242 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
D5x4c8-minmax-neonbf16-bfmlal.c252 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal() local
263 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
D4x4c8-minmax-neonfma-shland.c282 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local
291 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
D4x4c8-minmax-neonfma-zip.c282 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local
291 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
D5x4c8-minmax-neonfma-zip.c329 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local
340 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
D5x4c8-minmax-neonfma-shland.c329 const float32x2_t vsum1x2 = vadd_f32(vget_low_f32(vacc1x2), vget_high_f32(vacc1x2)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local
340 float32x4_t vacc1x0123 = vcombine_f32(vpadd_f32(vsum1x0, vsum1x1), vpadd_f32(vsum1x2, vsum1x3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()