Home
last modified time | relevance | path

Searched refs:vb0o (Results 1 – 10 of 10) sorted by relevance

/external/XNNPACK/src/bf16-gemm/gen/
D4x4c8-minmax-neonfma-zip.c124 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local
129 vacc0x0 = vfmaq_f32(vacc0x0, va0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
130 vacc1x0 = vfmaq_f32(vacc1x0, va1o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
131 vacc2x0 = vfmaq_f32(vacc2x0, va2o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
132 vacc3x0 = vfmaq_f32(vacc3x0, va3o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
218 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local
240 vacc0x0 = vfmaq_f32(vacc0x0, va0x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
241 vacc1x0 = vfmaq_f32(vacc1x0, va1x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
242 vacc2x0 = vfmaq_f32(vacc2x0, va2x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
243 vacc3x0 = vfmaq_f32(vacc3x0, va3x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
D4x4c8-minmax-neonfma-shland.c124 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local
129 vacc0x0 = vfmaq_f32(vacc0x0, va0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
130 vacc1x0 = vfmaq_f32(vacc1x0, va1o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
131 vacc2x0 = vfmaq_f32(vacc2x0, va2o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
132 vacc3x0 = vfmaq_f32(vacc3x0, va3o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
218 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local
240 vacc0x0 = vfmaq_f32(vacc0x0, va0x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
241 vacc1x0 = vfmaq_f32(vacc1x0, va1x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
242 vacc2x0 = vfmaq_f32(vacc2x0, va2x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
243 vacc3x0 = vfmaq_f32(vacc3x0, va3x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
D5x4c8-minmax-neonfma-zip.c141 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local
146 vacc0x0 = vfmaq_f32(vacc0x0, va0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
147 vacc1x0 = vfmaq_f32(vacc1x0, va1o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
148 vacc2x0 = vfmaq_f32(vacc2x0, va2o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
149 vacc3x0 = vfmaq_f32(vacc3x0, va3o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
150 vacc4x0 = vfmaq_f32(vacc4x0, va4o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
252 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local
278 vacc0x0 = vfmaq_f32(vacc0x0, va0x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
279 vacc1x0 = vfmaq_f32(vacc1x0, va1x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
280 vacc2x0 = vfmaq_f32(vacc2x0, va2x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
[all …]
D5x4c8-minmax-neonfma-shland.c141 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local
146 vacc0x0 = vfmaq_f32(vacc0x0, va0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
147 vacc1x0 = vfmaq_f32(vacc1x0, va1o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
148 vacc2x0 = vfmaq_f32(vacc2x0, va2o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
149 vacc3x0 = vfmaq_f32(vacc3x0, va3o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
150 vacc4x0 = vfmaq_f32(vacc4x0, va4o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
252 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local
278 vacc0x0 = vfmaq_f32(vacc0x0, va0x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
279 vacc1x0 = vfmaq_f32(vacc1x0, va1x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
280 vacc2x0 = vfmaq_f32(vacc2x0, va2x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
[all …]
D3x4c8-minmax-neonfma-shland.c107 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
112 vacc0x0 = vfmaq_f32(vacc0x0, va0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
113 vacc1x0 = vfmaq_f32(vacc1x0, va1o, vb0o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
114 vacc2x0 = vfmaq_f32(vacc2x0, va2o, vb0o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
184 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
202 vacc0x0 = vfmaq_f32(vacc0x0, va0x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
203 vacc1x0 = vfmaq_f32(vacc1x0, va1x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
204 vacc2x0 = vfmaq_f32(vacc2x0, va2x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
D3x4c8-minmax-neonfma-zip.c107 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
112 vacc0x0 = vfmaq_f32(vacc0x0, va0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
113 vacc1x0 = vfmaq_f32(vacc1x0, va1o, vb0o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
114 vacc2x0 = vfmaq_f32(vacc2x0, va2o, vb0o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
184 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
202 vacc0x0 = vfmaq_f32(vacc0x0, va0x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
203 vacc1x0 = vfmaq_f32(vacc1x0, va1x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
204 vacc2x0 = vfmaq_f32(vacc2x0, va2x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
D2x4c8-minmax-neonfma-shland.c90 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local
95 vacc0x0 = vfmaq_f32(vacc0x0, va0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
96 vacc1x0 = vfmaq_f32(vacc1x0, va1o, vb0o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
150 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local
164 vacc0x0 = vfmaq_f32(vacc0x0, va0x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
165 vacc1x0 = vfmaq_f32(vacc1x0, va1x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
D2x4c8-minmax-neonfma-zip.c90 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local
95 vacc0x0 = vfmaq_f32(vacc0x0, va0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
96 vacc1x0 = vfmaq_f32(vacc1x0, va1o, vb0o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
150 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local
164 vacc0x0 = vfmaq_f32(vacc0x0, va0x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
165 vacc1x0 = vfmaq_f32(vacc1x0, va1x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
D1x4c8-minmax-neonfma-zip.c73 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local
78 vacc0x0 = vfmaq_f32(vacc0x0, va0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
116 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local
126 vacc0x0 = vfmaq_f32(vacc0x0, va0x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
D1x4c8-minmax-neonfma-shland.c73 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local
78 vacc0x0 = vfmaq_f32(vacc0x0, va0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()
116 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local
126 vacc0x0 = vfmaq_f32(vacc0x0, va0x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()