Home
last modified time | relevance | path

Searched refs:vb3o (Results 1 – 10 of 10) sorted by relevance

/external/XNNPACK/src/bf16-gemm/gen/
D4x4c8-minmax-neonfma-zip.c127 const float32x4_t vb3o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local
141 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
142 vacc1x3 = vfmaq_f32(vacc1x3, va1o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
143 vacc2x3 = vfmaq_f32(vacc2x3, va2o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
144 vacc3x3 = vfmaq_f32(vacc3x3, va3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
221 const float32x4_t vb3o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local
252 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
253 vacc1x3 = vfmaq_f32(vacc1x3, va1x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
254 vacc2x3 = vfmaq_f32(vacc2x3, va2x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
255 vacc3x3 = vfmaq_f32(vacc3x3, va3x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
D4x4c8-minmax-neonfma-shland.c127 const float32x4_t vb3o = vreinterpretq_f32_u16(vandq_u16(vb3, vmask)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local
141 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
142 vacc1x3 = vfmaq_f32(vacc1x3, va1o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
143 vacc2x3 = vfmaq_f32(vacc2x3, va2o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
144 vacc3x3 = vfmaq_f32(vacc3x3, va3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
221 const float32x4_t vb3o = vreinterpretq_f32_u16(vandq_u16(vb3, vmask)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local
252 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
253 vacc1x3 = vfmaq_f32(vacc1x3, va1x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
254 vacc2x3 = vfmaq_f32(vacc2x3, va2x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
255 vacc3x3 = vfmaq_f32(vacc3x3, va3x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
D5x4c8-minmax-neonfma-zip.c144 const float32x4_t vb3o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local
161 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
162 vacc1x3 = vfmaq_f32(vacc1x3, va1o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
163 vacc2x3 = vfmaq_f32(vacc2x3, va2o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
164 vacc3x3 = vfmaq_f32(vacc3x3, va3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
165 vacc4x3 = vfmaq_f32(vacc4x3, va4o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
255 const float32x4_t vb3o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local
293 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
294 vacc1x3 = vfmaq_f32(vacc1x3, va1x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
295 vacc2x3 = vfmaq_f32(vacc2x3, va2x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
[all …]
D5x4c8-minmax-neonfma-shland.c144 const float32x4_t vb3o = vreinterpretq_f32_u16(vandq_u16(vb3, vmask)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local
161 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
162 vacc1x3 = vfmaq_f32(vacc1x3, va1o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
163 vacc2x3 = vfmaq_f32(vacc2x3, va2o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
164 vacc3x3 = vfmaq_f32(vacc3x3, va3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
165 vacc4x3 = vfmaq_f32(vacc4x3, va4o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
255 const float32x4_t vb3o = vreinterpretq_f32_u16(vandq_u16(vb3, vmask)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local
293 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
294 vacc1x3 = vfmaq_f32(vacc1x3, va1x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
295 vacc2x3 = vfmaq_f32(vacc2x3, va2x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
[all …]
D3x4c8-minmax-neonfma-shland.c110 const float32x4_t vb3o = vreinterpretq_f32_u16(vandq_u16(vb3, vmask)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
121 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
122 vacc1x3 = vfmaq_f32(vacc1x3, va1o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
123 vacc2x3 = vfmaq_f32(vacc2x3, va2o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
187 const float32x4_t vb3o = vreinterpretq_f32_u16(vandq_u16(vb3, vmask)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
211 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
212 vacc1x3 = vfmaq_f32(vacc1x3, va1x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
213 vacc2x3 = vfmaq_f32(vacc2x3, va2x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
D3x4c8-minmax-neonfma-zip.c110 const float32x4_t vb3o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
121 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
122 vacc1x3 = vfmaq_f32(vacc1x3, va1o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
123 vacc2x3 = vfmaq_f32(vacc2x3, va2o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
187 const float32x4_t vb3o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
211 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
212 vacc1x3 = vfmaq_f32(vacc1x3, va1x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
213 vacc2x3 = vfmaq_f32(vacc2x3, va2x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
D2x4c8-minmax-neonfma-shland.c93 const float32x4_t vb3o = vreinterpretq_f32_u16(vandq_u16(vb3, vmask)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local
101 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
102 vacc1x3 = vfmaq_f32(vacc1x3, va1o, vb3o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
153 const float32x4_t vb3o = vreinterpretq_f32_u16(vandq_u16(vb3, vmask)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local
170 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
171 vacc1x3 = vfmaq_f32(vacc1x3, va1x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
D2x4c8-minmax-neonfma-zip.c93 const float32x4_t vb3o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local
101 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
102 vacc1x3 = vfmaq_f32(vacc1x3, va1o, vb3o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
153 const float32x4_t vb3o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local
170 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
171 vacc1x3 = vfmaq_f32(vacc1x3, va1x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
D1x4c8-minmax-neonfma-zip.c76 const float32x4_t vb3o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local
81 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
119 const float32x4_t vb3o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local
129 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
D1x4c8-minmax-neonfma-shland.c76 const float32x4_t vb3o = vreinterpretq_f32_u16(vandq_u16(vb3, vmask)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local
81 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()
119 const float32x4_t vb3o = vreinterpretq_f32_u16(vandq_u16(vb3, vmask)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local
129 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()