Home
last modified time | relevance | path

Searched refs:vb3e (Results 1 – 10 of 10) sorted by relevance

/external/XNNPACK/src/bf16-gemm/gen/
D4x4c8-minmax-neonfma-zip.c100 const float32x4_t vb3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local
114 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
115 vacc1x3 = vfmaq_f32(vacc1x3, va1e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
116 vacc2x3 = vfmaq_f32(vacc2x3, va2e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
117 vacc3x3 = vfmaq_f32(vacc3x3, va3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
165 const float32x4_t vb3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local
213 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
214 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
215 vacc2x3 = vfmaq_f32(vacc2x3, va2x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
216 vacc3x3 = vfmaq_f32(vacc3x3, va3x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
D4x4c8-minmax-neonfma-shland.c100 const float32x4_t vb3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb3), 16)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local
114 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
115 vacc1x3 = vfmaq_f32(vacc1x3, va1e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
116 vacc2x3 = vfmaq_f32(vacc2x3, va2e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
117 vacc3x3 = vfmaq_f32(vacc3x3, va3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
165 const float32x4_t vb3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb3), 16)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local
213 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
214 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
215 vacc2x3 = vfmaq_f32(vacc2x3, va2x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
216 vacc3x3 = vfmaq_f32(vacc3x3, va3x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
D5x4c8-minmax-neonfma-zip.c112 const float32x4_t vb3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local
129 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
130 vacc1x3 = vfmaq_f32(vacc1x3, va1e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
131 vacc2x3 = vfmaq_f32(vacc2x3, va2e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
132 vacc3x3 = vfmaq_f32(vacc3x3, va3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
133 vacc4x3 = vfmaq_f32(vacc4x3, va4e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
187 const float32x4_t vb3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local
246 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
247 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
248 vacc2x3 = vfmaq_f32(vacc2x3, va2x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
[all …]
D5x4c8-minmax-neonfma-shland.c112 const float32x4_t vb3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb3), 16)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local
129 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
130 vacc1x3 = vfmaq_f32(vacc1x3, va1e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
131 vacc2x3 = vfmaq_f32(vacc2x3, va2e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
132 vacc3x3 = vfmaq_f32(vacc3x3, va3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
133 vacc4x3 = vfmaq_f32(vacc4x3, va4e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
187 const float32x4_t vb3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb3), 16)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local
246 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
247 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
248 vacc2x3 = vfmaq_f32(vacc2x3, va2x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
[all …]
D3x4c8-minmax-neonfma-shland.c88 const float32x4_t vb3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb3), 16)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
99 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
100 vacc1x3 = vfmaq_f32(vacc1x3, va1e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
101 vacc2x3 = vfmaq_f32(vacc2x3, va2e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
143 const float32x4_t vb3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb3), 16)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
180 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
181 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
182 vacc2x3 = vfmaq_f32(vacc2x3, va2x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
D3x4c8-minmax-neonfma-zip.c88 const float32x4_t vb3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
99 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
100 vacc1x3 = vfmaq_f32(vacc1x3, va1e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
101 vacc2x3 = vfmaq_f32(vacc2x3, va2e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
143 const float32x4_t vb3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
180 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
181 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
182 vacc2x3 = vfmaq_f32(vacc2x3, va2x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
D2x4c8-minmax-neonfma-shland.c76 const float32x4_t vb3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb3), 16)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local
84 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
85 vacc1x3 = vfmaq_f32(vacc1x3, va1e, vb3e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
121 const float32x4_t vb3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb3), 16)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local
147 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
148 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
D2x4c8-minmax-neonfma-zip.c76 const float32x4_t vb3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local
84 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
85 vacc1x3 = vfmaq_f32(vacc1x3, va1e, vb3e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
121 const float32x4_t vb3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local
147 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
148 vacc1x3 = vfmaq_f32(vacc1x3, va1x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
D1x4c8-minmax-neonfma-zip.c64 const float32x4_t vb3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local
69 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
99 const float32x4_t vb3e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb3)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local
114 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
D1x4c8-minmax-neonfma-shland.c64 const float32x4_t vb3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb3), 16)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local
69 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()
99 const float32x4_t vb3e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb3), 16)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local
114 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()