Home
last modified time | relevance | path

Searched refs:va2x3 (Results 1 – 12 of 12) sorted by relevance

/external/XNNPACK/src/bf16-gemm/gen/
D3x4c8-minmax-neonbf16-bfmlal.c154 … const bfloat16x8_t va2x3 = vreinterpretq_bf16_u16(vbicq_u16(vreinterpretq_u16_bf16(va2), vm3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal() local
155 vacc2x3 = vbfmlalbq_f32(vacc2x3, va2x3, vb3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
156 vacc2x3 = vbfmlaltq_f32(vacc2x3, va2x3, vb3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
D3x4c8-minmax-neonfma-shland.c156 const uint16x8_t va2x3 = vbicq_u16(va2, vm3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
169 … const float32x4_t va2x3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(va2x3), 16)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
200 const float32x4_t va2x3o = vreinterpretq_f32_u16(vandq_u16(va2x3, vmask)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
D3x4c8-minmax-neonfma-zip.c156 const uint16x8_t va2x3 = vbicq_u16(va2, vm3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
169 const float32x4_t va2x3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, va2x3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
200 const float32x4_t va2x3o = vreinterpretq_f32_u16(vzip2q_u16(vzero, va2x3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
D4x4c8-minmax-neonbf16-bfmlal.c183 … const bfloat16x8_t va2x3 = vreinterpretq_bf16_u16(vbicq_u16(vreinterpretq_u16_bf16(va2), vm3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal() local
184 vacc2x3 = vbfmlalbq_f32(vacc2x3, va2x3, vb3); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
185 vacc2x3 = vbfmlaltq_f32(vacc2x3, va2x3, vb3); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
D3x4c8-minmax-neonbf16-bfdot.c130 … const bfloat16x8_t va2x3 = vreinterpretq_bf16_u16(vbicq_u16(vreinterpretq_u16_bf16(va2), vm3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot() local
131 vacc2x3 = vbfdotq_f32(vacc2x3, va2x3, vb3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
D4x4c8-minmax-neonfma-zip.c181 const uint16x8_t va2x3 = vbicq_u16(va2, vm3); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local
198 const float32x4_t va2x3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, va2x3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
237 const float32x4_t va2x3o = vreinterpretq_f32_u16(vzip2q_u16(vzero, va2x3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
D4x4c8-minmax-neonfma-shland.c181 const uint16x8_t va2x3 = vbicq_u16(va2, vm3); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local
198 … const float32x4_t va2x3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(va2x3), 16)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
237 const float32x4_t va2x3o = vreinterpretq_f32_u16(vandq_u16(va2x3, vmask)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
D5x4c8-minmax-neonbf16-bfmlal.c212 … const bfloat16x8_t va2x3 = vreinterpretq_bf16_u16(vbicq_u16(vreinterpretq_u16_bf16(va2), vm3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal() local
213 vacc2x3 = vbfmlalbq_f32(vacc2x3, va2x3, vb3); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
214 vacc2x3 = vbfmlaltq_f32(vacc2x3, va2x3, vb3); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
D4x4c8-minmax-neonbf16-bfdot.c152 … const bfloat16x8_t va2x3 = vreinterpretq_bf16_u16(vbicq_u16(vreinterpretq_u16_bf16(va2), vm3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot() local
153 vacc2x3 = vbfdotq_f32(vacc2x3, va2x3, vb3); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot()
D5x4c8-minmax-neonfma-zip.c206 const uint16x8_t va2x3 = vbicq_u16(va2, vm3); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local
227 const float32x4_t va2x3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, va2x3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
274 const float32x4_t va2x3o = vreinterpretq_f32_u16(vzip2q_u16(vzero, va2x3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
D5x4c8-minmax-neonfma-shland.c206 const uint16x8_t va2x3 = vbicq_u16(va2, vm3); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local
227 … const float32x4_t va2x3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(va2x3), 16)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
274 const float32x4_t va2x3o = vreinterpretq_f32_u16(vandq_u16(va2x3, vmask)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
D5x4c8-minmax-neonbf16-bfdot.c174 … const bfloat16x8_t va2x3 = vreinterpretq_bf16_u16(vbicq_u16(vreinterpretq_u16_bf16(va2), vm3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfdot() local
175 vacc2x3 = vbfdotq_f32(vacc2x3, va2x3, vb3); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfdot()