Home
last modified time | relevance | path

Searched refs:vsum0x1 (Results 1 – 20 of 20) sorted by relevance

/external/XNNPACK/src/bf16-gemm/gen/
D1x4c8-minmax-neonbf16-bfdot.c93 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot() local
97 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot()
D1x4c8-minmax-neonbf16-bfmlal.c102 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal() local
106 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal()
D1x4c8-minmax-neonfma-zip.c139 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local
143 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
D2x4c8-minmax-neonbf16-bfdot.c121 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfdot() local
128 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfdot()
D1x4c8-minmax-neonfma-shland.c139 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local
143 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()
D2x4c8-minmax-neonbf16-bfmlal.c138 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() local
145 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal()
D3x4c8-minmax-neonbf16-bfdot.c149 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot() local
159 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
D2x4c8-minmax-neonfma-shland.c185 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local
192 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
D2x4c8-minmax-neonfma-zip.c185 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local
192 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
D3x4c8-minmax-neonbf16-bfmlal.c174 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal() local
184 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
D4x4c8-minmax-neonbf16-bfdot.c177 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot() local
190 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot()
D3x4c8-minmax-neonfma-zip.c231 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
241 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
D4x4c8-minmax-neonbf16-bfmlal.c210 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal() local
223 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
D5x4c8-minmax-neonbf16-bfdot.c205 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfdot() local
221 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfdot()
D3x4c8-minmax-neonfma-shland.c231 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
241 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
D5x4c8-minmax-neonbf16-bfmlal.c246 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal() local
262 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
D4x4c8-minmax-neonfma-shland.c277 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local
290 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
D4x4c8-minmax-neonfma-zip.c277 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local
290 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
D5x4c8-minmax-neonfma-zip.c323 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local
339 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
D5x4c8-minmax-neonfma-shland.c323 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local
339 float32x4_t vacc0x0123 = vcombine_f32(vpadd_f32(vsum0x0, vsum0x1), vpadd_f32(vsum0x2, vsum0x3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()