Home
last modified time | relevance | path

Searched refs:vb2o (Results 1 – 10 of 10) sorted by relevance

/external/XNNPACK/src/bf16-gemm/gen/
D4x4c8-minmax-neonfma-zip.c126 const float32x4_t vb2o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local
137 vacc0x2 = vfmaq_f32(vacc0x2, va0o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
138 vacc1x2 = vfmaq_f32(vacc1x2, va1o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
139 vacc2x2 = vfmaq_f32(vacc2x2, va2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
140 vacc3x2 = vfmaq_f32(vacc3x2, va3o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
220 const float32x4_t vb2o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local
248 vacc0x2 = vfmaq_f32(vacc0x2, va0x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
249 vacc1x2 = vfmaq_f32(vacc1x2, va1x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
250 vacc2x2 = vfmaq_f32(vacc2x2, va2x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
251 vacc3x2 = vfmaq_f32(vacc3x2, va3x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
D4x4c8-minmax-neonfma-shland.c126 const float32x4_t vb2o = vreinterpretq_f32_u16(vandq_u16(vb2, vmask)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local
137 vacc0x2 = vfmaq_f32(vacc0x2, va0o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
138 vacc1x2 = vfmaq_f32(vacc1x2, va1o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
139 vacc2x2 = vfmaq_f32(vacc2x2, va2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
140 vacc3x2 = vfmaq_f32(vacc3x2, va3o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
220 const float32x4_t vb2o = vreinterpretq_f32_u16(vandq_u16(vb2, vmask)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local
248 vacc0x2 = vfmaq_f32(vacc0x2, va0x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
249 vacc1x2 = vfmaq_f32(vacc1x2, va1x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
250 vacc2x2 = vfmaq_f32(vacc2x2, va2x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
251 vacc3x2 = vfmaq_f32(vacc3x2, va3x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
D5x4c8-minmax-neonfma-zip.c143 const float32x4_t vb2o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local
156 vacc0x2 = vfmaq_f32(vacc0x2, va0o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
157 vacc1x2 = vfmaq_f32(vacc1x2, va1o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
158 vacc2x2 = vfmaq_f32(vacc2x2, va2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
159 vacc3x2 = vfmaq_f32(vacc3x2, va3o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
160 vacc4x2 = vfmaq_f32(vacc4x2, va4o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
254 const float32x4_t vb2o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local
288 vacc0x2 = vfmaq_f32(vacc0x2, va0x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
289 vacc1x2 = vfmaq_f32(vacc1x2, va1x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
290 vacc2x2 = vfmaq_f32(vacc2x2, va2x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
[all …]
D5x4c8-minmax-neonfma-shland.c143 const float32x4_t vb2o = vreinterpretq_f32_u16(vandq_u16(vb2, vmask)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local
156 vacc0x2 = vfmaq_f32(vacc0x2, va0o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
157 vacc1x2 = vfmaq_f32(vacc1x2, va1o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
158 vacc2x2 = vfmaq_f32(vacc2x2, va2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
159 vacc3x2 = vfmaq_f32(vacc3x2, va3o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
160 vacc4x2 = vfmaq_f32(vacc4x2, va4o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
254 const float32x4_t vb2o = vreinterpretq_f32_u16(vandq_u16(vb2, vmask)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local
288 vacc0x2 = vfmaq_f32(vacc0x2, va0x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
289 vacc1x2 = vfmaq_f32(vacc1x2, va1x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
290 vacc2x2 = vfmaq_f32(vacc2x2, va2x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
[all …]
D3x4c8-minmax-neonfma-shland.c109 const float32x4_t vb2o = vreinterpretq_f32_u16(vandq_u16(vb2, vmask)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
118 vacc0x2 = vfmaq_f32(vacc0x2, va0o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
119 vacc1x2 = vfmaq_f32(vacc1x2, va1o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
120 vacc2x2 = vfmaq_f32(vacc2x2, va2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
186 const float32x4_t vb2o = vreinterpretq_f32_u16(vandq_u16(vb2, vmask)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
208 vacc0x2 = vfmaq_f32(vacc0x2, va0x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
209 vacc1x2 = vfmaq_f32(vacc1x2, va1x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
210 vacc2x2 = vfmaq_f32(vacc2x2, va2x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
D3x4c8-minmax-neonfma-zip.c109 const float32x4_t vb2o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
118 vacc0x2 = vfmaq_f32(vacc0x2, va0o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
119 vacc1x2 = vfmaq_f32(vacc1x2, va1o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
120 vacc2x2 = vfmaq_f32(vacc2x2, va2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
186 const float32x4_t vb2o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
208 vacc0x2 = vfmaq_f32(vacc0x2, va0x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
209 vacc1x2 = vfmaq_f32(vacc1x2, va1x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
210 vacc2x2 = vfmaq_f32(vacc2x2, va2x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
D2x4c8-minmax-neonfma-shland.c92 const float32x4_t vb2o = vreinterpretq_f32_u16(vandq_u16(vb2, vmask)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local
99 vacc0x2 = vfmaq_f32(vacc0x2, va0o, vb2o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
100 vacc1x2 = vfmaq_f32(vacc1x2, va1o, vb2o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
152 const float32x4_t vb2o = vreinterpretq_f32_u16(vandq_u16(vb2, vmask)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local
168 vacc0x2 = vfmaq_f32(vacc0x2, va0x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
169 vacc1x2 = vfmaq_f32(vacc1x2, va1x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
D2x4c8-minmax-neonfma-zip.c92 const float32x4_t vb2o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local
99 vacc0x2 = vfmaq_f32(vacc0x2, va0o, vb2o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
100 vacc1x2 = vfmaq_f32(vacc1x2, va1o, vb2o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
152 const float32x4_t vb2o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local
168 vacc0x2 = vfmaq_f32(vacc0x2, va0x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
169 vacc1x2 = vfmaq_f32(vacc1x2, va1x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
D1x4c8-minmax-neonfma-zip.c75 const float32x4_t vb2o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local
80 vacc0x2 = vfmaq_f32(vacc0x2, va0o, vb2o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
118 const float32x4_t vb2o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local
128 vacc0x2 = vfmaq_f32(vacc0x2, va0x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
D1x4c8-minmax-neonfma-shland.c75 const float32x4_t vb2o = vreinterpretq_f32_u16(vandq_u16(vb2, vmask)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local
80 vacc0x2 = vfmaq_f32(vacc0x2, va0o, vb2o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()
118 const float32x4_t vb2o = vreinterpretq_f32_u16(vandq_u16(vb2, vmask)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local
128 vacc0x2 = vfmaq_f32(vacc0x2, va0x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()