Searched refs:va1x3e (Results 1 – 8 of 8) sorted by relevance
/external/XNNPACK/src/bf16-gemm/gen/ |
D | 2x4c8-minmax-neonfma-shland.c | 139 … const float32x4_t va1x3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(va1x3), 16)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local 148 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
|
D | 2x4c8-minmax-neonfma-zip.c | 139 const float32x4_t va1x3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, va1x3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local 148 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
|
D | 3x4c8-minmax-neonfma-shland.c | 168 … const float32x4_t va1x3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(va1x3), 16)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local 181 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
|
D | 3x4c8-minmax-neonfma-zip.c | 168 const float32x4_t va1x3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, va1x3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local 181 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
|
D | 4x4c8-minmax-neonfma-zip.c | 197 const float32x4_t va1x3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, va1x3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local 214 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
|
D | 4x4c8-minmax-neonfma-shland.c | 197 … const float32x4_t va1x3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(va1x3), 16)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local 214 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
|
D | 5x4c8-minmax-neonfma-zip.c | 226 const float32x4_t va1x3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, va1x3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local 247 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
|
D | 5x4c8-minmax-neonfma-shland.c | 226 … const float32x4_t va1x3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(va1x3), 16)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local 247 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
|