| /external/XNNPACK/src/bf16-gemm/gen/ |
| D | 1x4c8-minmax-neonfma-zip.c | 63 const float32x4_t vb2e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local 98 const float32x4_t vb2e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local
|
| D | 1x4c8-minmax-neonfma-shland.c | 63 const float32x4_t vb2e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb2), 16)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local 98 const float32x4_t vb2e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb2), 16)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local
|
| D | 2x4c8-minmax-neonfma-shland.c | 75 const float32x4_t vb2e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb2), 16)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local 120 const float32x4_t vb2e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb2), 16)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local
|
| D | 2x4c8-minmax-neonfma-zip.c | 75 const float32x4_t vb2e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local 120 const float32x4_t vb2e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local
|
| D | 3x4c8-minmax-neonfma-zip.c | 87 const float32x4_t vb2e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local 142 const float32x4_t vb2e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
|
| D | 3x4c8-minmax-neonfma-shland.c | 87 const float32x4_t vb2e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb2), 16)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local 142 const float32x4_t vb2e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb2), 16)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
|
| D | 4x4c8-minmax-neonfma-shland.c | 99 const float32x4_t vb2e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb2), 16)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local 164 const float32x4_t vb2e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb2), 16)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local
|
| D | 4x4c8-minmax-neonfma-zip.c | 99 const float32x4_t vb2e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local 164 const float32x4_t vb2e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local
|
| D | 5x4c8-minmax-neonfma-zip.c | 111 const float32x4_t vb2e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local 186 const float32x4_t vb2e = vreinterpretq_f32_u16(vzip1q_u16(vzero, vb2)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip() local
|
| D | 5x4c8-minmax-neonfma-shland.c | 111 const float32x4_t vb2e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb2), 16)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local 186 const float32x4_t vb2e = vreinterpretq_f32_u32(vshlq_n_u32(vreinterpretq_u32_u16(vb2), 16)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland() local
|