/external/XNNPACK/src/bf16-gemm/gen/ |
D | 1x4c8-minmax-neonfma-zip.c | 73 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local 116 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local
|
D | 1x4c8-minmax-neonfma-shland.c | 73 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local 116 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local
|
D | 2x4c8-minmax-neonfma-shland.c | 90 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local 150 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local
|
D | 2x4c8-minmax-neonfma-zip.c | 90 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local 150 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local
|
D | 3x4c8-minmax-neonfma-zip.c | 107 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local 184 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
|
D | 3x4c8-minmax-neonfma-shland.c | 107 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local 184 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
|
D | 4x4c8-minmax-neonfma-shland.c | 124 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local 218 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local
|
D | 4x4c8-minmax-neonfma-zip.c | 124 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local 218 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local
|
D | 5x4c8-minmax-neonfma-zip.c | 141 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local 252 const float32x4_t vb0o = vreinterpretq_f32_u16(vzip2q_u16(vzero, vb0)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local
|
D | 5x4c8-minmax-neonfma-shland.c | 141 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local 252 const float32x4_t vb0o = vreinterpretq_f32_u16(vandq_u16(vb0, vmask)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local
|