/external/llvm-project/clang/test/CodeGen/ |
D | aarch64-neon-fma.c | 13 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) { in test_vmla_n_f32() 57 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { in test_vmls_n_f32() 78 float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { in test_vmla_lane_f32_0() 89 float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { in test_vmlaq_lane_f32_0() 100 float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { in test_vmla_laneq_f32_0() 122 float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { in test_vmls_lane_f32_0() 133 float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { in test_vmlsq_lane_f32_0() 144 float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { in test_vmls_laneq_f32_0() 166 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { in test_vmla_lane_f32() 177 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { in test_vmlaq_lane_f32() [all …]
|
D | aarch64-neon-fp16fml.c | 21 float32x2_t test_vfmlal_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) { in test_vfmlal_low_f16() 33 float32x2_t test_vfmlsl_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) { in test_vfmlsl_low_f16() 45 float32x2_t test_vfmlal_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) { in test_vfmlal_high_f16() 57 float32x2_t test_vfmlsl_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) { in test_vfmlsl_high_f16() 159 float32x2_t test_vfmlal_lane_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) { in test_vfmlal_lane_low_f16() 211 float32x2_t test_vfmlal_lane_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) { in test_vfmlal_lane_high_f16() 447 float32x2_t test_vfmlal_laneq_low_f16(float32x2_t a, float16x4_t b, float16x8_t c) { in test_vfmlal_laneq_low_f16() 499 float32x2_t test_vfmlal_laneq_high_f16(float32x2_t a, float16x4_t b, float16x8_t c) { in test_vfmlal_laneq_high_f16() 735 float32x2_t test_vfmlsl_lane_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) { in test_vfmlsl_lane_low_f16() 787 float32x2_t test_vfmlsl_lane_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) { in test_vfmlsl_lane_high_f16() [all …]
|
D | arm-neon-directed-rounding.c | 14 float32x2_t test_vrnda_f32(float32x2_t a) { in test_vrnda_f32() 30 float32x2_t test_vrndm_f32(float32x2_t a) { in test_vrndm_f32() 46 float32x2_t test_vrndn_f32(float32x2_t a) { in test_vrndn_f32() 62 float32x2_t test_vrndp_f32(float32x2_t a) { in test_vrndp_f32() 78 float32x2_t test_vrndx_f32(float32x2_t a) { in test_vrndx_f32() 94 float32x2_t test_vrnd_f32(float32x2_t a) { in test_vrnd_f32() 118 float32x2_t test_vrndi_f32(float32x2_t a) { in test_vrndi_f32()
|
D | aarch64-neon-intrinsics-constrained.c | 34 float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) { in test_vadd_f32() 52 float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) { in test_vsub_f32() 79 float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) { in test_vmul_f32() 109 float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { in test_vmla_f32() 145 float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { in test_vmls_f32() 181 float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { in test_vfma_f32() 219 float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { in test_vfms_f32() 274 float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) { in test_vdiv_f32() 284 uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) { in test_vceq_f32() 325 uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) { in test_vcge_f32() [all …]
|
D | arm-neon-fma.c | 13 float32x2_t test_fma_order(float32x2_t accum, float32x2_t lhs, float32x2_t rhs) { in test_fma_order() 31 float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) { in test_vfma_n_f32()
|
D | arm-neon-vcvtX.c | 8 int32x2_t test_vcvta_s32_f32(float32x2_t a) { in test_vcvta_s32_f32() 15 uint32x2_t test_vcvta_u32_f32(float32x2_t a) { in test_vcvta_u32_f32() 36 int32x2_t test_vcvtn_s32_f32(float32x2_t a) { in test_vcvtn_s32_f32() 43 uint32x2_t test_vcvtn_u32_f32(float32x2_t a) { in test_vcvtn_u32_f32() 64 int32x2_t test_vcvtp_s32_f32(float32x2_t a) { in test_vcvtp_s32_f32() 71 uint32x2_t test_vcvtp_u32_f32(float32x2_t a) { in test_vcvtp_u32_f32() 92 int32x2_t test_vcvtm_s32_f32(float32x2_t a) { in test_vcvtm_s32_f32() 99 uint32x2_t test_vcvtm_u32_f32(float32x2_t a) { in test_vcvtm_u32_f32()
|
D | arm-neon-numeric-maxmin.c | 8 float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) { in test_vmaxnm_f32() 22 float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) { in test_vminnm_f32()
|
/external/clang/test/CodeGen/ |
D | aarch64-neon-fma.c | 13 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) { in test_vmla_n_f32() 57 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { in test_vmls_n_f32() 76 float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { in test_vmla_lane_f32_0() 85 float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { in test_vmlaq_lane_f32_0() 94 float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { in test_vmla_laneq_f32_0() 112 float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { in test_vmls_lane_f32_0() 121 float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { in test_vmlsq_lane_f32_0() 130 float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { in test_vmls_laneq_f32_0() 148 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { in test_vmla_lane_f32() 157 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { in test_vmlaq_lane_f32() [all …]
|
D | arm-neon-directed-rounding.c | 12 float32x2_t test_vrnda_f32(float32x2_t a) { in test_vrnda_f32() 34 float32x2_t test_vrndm_f32(float32x2_t a) { in test_vrndm_f32() 56 float32x2_t test_vrndn_f32(float32x2_t a) { in test_vrndn_f32() 78 float32x2_t test_vrndp_f32(float32x2_t a) { in test_vrndp_f32() 100 float32x2_t test_vrndx_f32(float32x2_t a) { in test_vrndx_f32() 122 float32x2_t test_vrnd_f32(float32x2_t a) { in test_vrnd_f32()
|
D | arm-neon-vcvtX.c | 10 int32x2_t test_vcvta_s32_f32(float32x2_t a) { in test_vcvta_s32_f32() 19 uint32x2_t test_vcvta_u32_f32(float32x2_t a) { in test_vcvta_u32_f32() 46 int32x2_t test_vcvtn_s32_f32(float32x2_t a) { in test_vcvtn_s32_f32() 55 uint32x2_t test_vcvtn_u32_f32(float32x2_t a) { in test_vcvtn_u32_f32() 82 int32x2_t test_vcvtp_s32_f32(float32x2_t a) { in test_vcvtp_s32_f32() 91 uint32x2_t test_vcvtp_u32_f32(float32x2_t a) { in test_vcvtp_u32_f32() 118 int32x2_t test_vcvtm_s32_f32(float32x2_t a) { in test_vcvtm_s32_f32() 127 uint32x2_t test_vcvtm_u32_f32(float32x2_t a) { in test_vcvtm_u32_f32()
|
D | arm-neon-numeric-maxmin.c | 14 float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) { in test_vmaxnm_f32() 40 float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) { in test_vminnm_f32()
|
D | aarch64-neon-misc.c | 177 uint32x2_t test_vceqz_f32(float32x2_t a) { in test_vceqz_f32() 339 uint32x2_t test_vcgez_f32(float32x2_t a) { in test_vcgez_f32() 455 uint32x2_t test_vclez_f32(float32x2_t a) { in test_vclez_f32() 571 uint32x2_t test_vcgtz_f32(float32x2_t a) { in test_vcgtz_f32() 687 uint32x2_t test_vcltz_f32(float32x2_t a) { in test_vcltz_f32() 906 float32x2_t test_vrev64_f32(float32x2_t a) { in test_vrev64_f32() 1399 float32x2_t test_vneg_f32(float32x2_t a) { in test_vneg_f32() 1481 float32x2_t test_vabs_f32(float32x2_t a) { in test_vabs_f32() 2383 float32x2_t test_vcvt_f32_f64(float64x2_t a) { in test_vcvt_f32_f64() 2393 float32x4_t test_vcvt_high_f32_f64(float32x2_t a, float64x2_t b) { in test_vcvt_high_f32_f64() [all …]
|
/external/XNNPACK/src/f32-ibilinear-chw/gen/ |
D | neonfma-p4.c | 49 const float32x2_t vtltr0 = vld1_f32(itl0); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() 50 const float32x2_t vblbr0 = vld1_f32(ibl0); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() 51 const float32x2_t vtltr1 = vld1_f32(itl1); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() 52 const float32x2_t vblbr1 = vld1_f32(ibl1); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() 53 const float32x2_t vtltr2 = vld1_f32(itl2); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() 54 const float32x2_t vblbr2 = vld1_f32(ibl2); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() 55 const float32x2_t vtltr3 = vld1_f32(itl3); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() 56 const float32x2_t vblbr3 = vld1_f32(ibl3); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() 92 const float32x2_t valphah = vw.val[0]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() 93 const float32x2_t valphav = vw.val[1]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() [all …]
|
D | neon-p4.c | 49 const float32x2_t vtltr0 = vld1_f32(itl0); in xnn_f32_ibilinear_chw_ukernel__neon_p4() 50 const float32x2_t vblbr0 = vld1_f32(ibl0); in xnn_f32_ibilinear_chw_ukernel__neon_p4() 51 const float32x2_t vtltr1 = vld1_f32(itl1); in xnn_f32_ibilinear_chw_ukernel__neon_p4() 52 const float32x2_t vblbr1 = vld1_f32(ibl1); in xnn_f32_ibilinear_chw_ukernel__neon_p4() 53 const float32x2_t vtltr2 = vld1_f32(itl2); in xnn_f32_ibilinear_chw_ukernel__neon_p4() 54 const float32x2_t vblbr2 = vld1_f32(ibl2); in xnn_f32_ibilinear_chw_ukernel__neon_p4() 55 const float32x2_t vtltr3 = vld1_f32(itl3); in xnn_f32_ibilinear_chw_ukernel__neon_p4() 56 const float32x2_t vblbr3 = vld1_f32(ibl3); in xnn_f32_ibilinear_chw_ukernel__neon_p4() 92 const float32x2_t valphah = vw.val[0]; in xnn_f32_ibilinear_chw_ukernel__neon_p4() 93 const float32x2_t valphav = vw.val[1]; in xnn_f32_ibilinear_chw_ukernel__neon_p4() [all …]
|
D | neonfma-p8.c | 57 const float32x2_t vtltr0 = vld1_f32(itl0); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() 58 const float32x2_t vblbr0 = vld1_f32(ibl0); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() 59 const float32x2_t vtltr1 = vld1_f32(itl1); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() 60 const float32x2_t vblbr1 = vld1_f32(ibl1); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() 61 const float32x2_t vtltr2 = vld1_f32(itl2); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() 62 const float32x2_t vblbr2 = vld1_f32(ibl2); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() 63 const float32x2_t vtltr3 = vld1_f32(itl3); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() 64 const float32x2_t vblbr3 = vld1_f32(ibl3); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() 65 const float32x2_t vtltr4 = vld1_f32(itl4); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() 66 const float32x2_t vblbr4 = vld1_f32(ibl4); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() [all …]
|
D | neon-p8.c | 57 const float32x2_t vtltr0 = vld1_f32(itl0); in xnn_f32_ibilinear_chw_ukernel__neon_p8() 58 const float32x2_t vblbr0 = vld1_f32(ibl0); in xnn_f32_ibilinear_chw_ukernel__neon_p8() 59 const float32x2_t vtltr1 = vld1_f32(itl1); in xnn_f32_ibilinear_chw_ukernel__neon_p8() 60 const float32x2_t vblbr1 = vld1_f32(ibl1); in xnn_f32_ibilinear_chw_ukernel__neon_p8() 61 const float32x2_t vtltr2 = vld1_f32(itl2); in xnn_f32_ibilinear_chw_ukernel__neon_p8() 62 const float32x2_t vblbr2 = vld1_f32(ibl2); in xnn_f32_ibilinear_chw_ukernel__neon_p8() 63 const float32x2_t vtltr3 = vld1_f32(itl3); in xnn_f32_ibilinear_chw_ukernel__neon_p8() 64 const float32x2_t vblbr3 = vld1_f32(ibl3); in xnn_f32_ibilinear_chw_ukernel__neon_p8() 65 const float32x2_t vtltr4 = vld1_f32(itl4); in xnn_f32_ibilinear_chw_ukernel__neon_p8() 66 const float32x2_t vblbr4 = vld1_f32(ibl4); in xnn_f32_ibilinear_chw_ukernel__neon_p8() [all …]
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x2-minmax-neonfma-lane-ld64.c | 61 float32x2_t vacc0x01 = vld1_f32(w); w += 2; in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 62 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 63 float32x2_t vacc2x01 = vacc0x01; in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 64 float32x2_t vacc3x01 = vacc0x01; in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 68 const float32x2_t va0 = vld1_f32(a0); a0 += 2; in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 69 const float32x2_t va1 = vld1_f32(a1); a1 += 2; in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 70 const float32x2_t va2 = vld1_f32(a2); a2 += 2; in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 71 const float32x2_t va3 = vld1_f32(a3); a3 += 2; in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 73 const float32x2_t vb01c0 = vld1_f32(w); w += 2; in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 81 const float32x2_t va0c0 = vdup_lane_f32(va0, 0); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() [all …]
|
D | 4x2-minmax-neon-lane-ld64.c | 61 float32x2_t vacc0x01 = vld1_f32(w); w += 2; in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 62 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 63 float32x2_t vacc2x01 = vacc0x01; in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 64 float32x2_t vacc3x01 = vacc0x01; in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 68 const float32x2_t va0 = vld1_f32(a0); a0 += 2; in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 69 const float32x2_t va1 = vld1_f32(a1); a1 += 2; in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 70 const float32x2_t va2 = vld1_f32(a2); a2 += 2; in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 71 const float32x2_t va3 = vld1_f32(a3); a3 += 2; in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 73 const float32x2_t vb01c0 = vld1_f32(w); w += 2; in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 79 const float32x2_t vb01c1 = vld1_f32(w); w += 2; in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() [all …]
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x2-minmax-neonfma-lane-ld64.c | 59 float32x2_t vacc0x01 = vld1_f32(w); w += 2; in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 60 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 61 float32x2_t vacc2x01 = vacc0x01; in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 62 float32x2_t vacc3x01 = vacc0x01; in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 90 const float32x2_t va0 = vld1_f32(a0); a0 += 2; in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 91 const float32x2_t va1 = vld1_f32(a1); a1 += 2; in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 92 const float32x2_t va2 = vld1_f32(a2); a2 += 2; in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 93 const float32x2_t va3 = vld1_f32(a3); a3 += 2; in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 95 const float32x2_t vb01c0 = vld1_f32(w); w += 2; in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 103 const float32x2_t va0c0 = vdup_lane_f32(va0, 0); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() [all …]
|
D | 4x2-minmax-neon-lane-ld64.c | 59 float32x2_t vacc0x01 = vld1_f32(w); w += 2; in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 60 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 61 float32x2_t vacc2x01 = vacc0x01; in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 62 float32x2_t vacc3x01 = vacc0x01; in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 90 const float32x2_t va0 = vld1_f32(a0); a0 += 2; in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 91 const float32x2_t va1 = vld1_f32(a1); a1 += 2; in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 92 const float32x2_t va2 = vld1_f32(a2); a2 += 2; in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 93 const float32x2_t va3 = vld1_f32(a3); a3 += 2; in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 95 const float32x2_t vb01c0 = vld1_f32(w); w += 2; in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 101 const float32x2_t vb01c1 = vld1_f32(w); w += 2; in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() [all …]
|
/external/llvm-project/clang/test/CodeGenCXX/ |
D | ubsan-new-checks.cpp | 21 typedef __attribute__((ext_vector_type(2), aligned(32))) float float32x2_t; typedef 24 float32x2_t x; 57 float32x2_t *func_04() { in func_04() 62 return new float32x2_t; in func_04() 65 float32x2_t *func_05() { in func_05() 71 return new float32x2_t[20]; in func_05() 113 float32x2_t *func_12() { in func_12() 118 return new float32x2_t; in func_12() 121 float32x2_t *func_13() { in func_13() 127 return new float32x2_t[20]; in func_13()
|
/external/XNNPACK/src/f32-spmm/gen/ |
D | 4x4-minmax-neonfma.c | 119 float32x2_t vacc01n0 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 120 float32x2_t vacc01n1 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 121 float32x2_t vacc01n2 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 122 float32x2_t vacc01n3 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 126 const float32x2_t vi01 = vld1_f32(input); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 136 float32x2_t vout01n0 = vmin_f32(vacc01n0, vget_low_f32(vmax)); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 137 float32x2_t vout01n1 = vmin_f32(vacc01n1, vget_low_f32(vmax)); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 138 float32x2_t vout01n2 = vmin_f32(vacc01n2, vget_low_f32(vmax)); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 139 float32x2_t vout01n3 = vmin_f32(vacc01n3, vget_low_f32(vmax)); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 161 float32x2_t vacc01 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() [all …]
|
D | 4x2-minmax-neonfma.c | 50 const float32x2_t vw = vld1_f32(w); w += 2; in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 107 float32x2_t vacc01n0 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 108 float32x2_t vacc01n1 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 112 const float32x2_t vi01 = vld1_f32(input); in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 114 const float32x2_t vw = vld1_f32(w); w += 2; in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 120 float32x2_t vout01n0 = vmin_f32(vacc01n0, vget_low_f32(vmax)); in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 121 float32x2_t vout01n1 = vmin_f32(vacc01n1, vget_low_f32(vmax)); in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 137 float32x2_t vacc01 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 141 const float32x2_t vi01 = vld1_f32(input); in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 143 const float32x2_t vw = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() [all …]
|
D | 8x2-minmax-neonfma.c | 53 const float32x2_t vw = vld1_f32(w); w += 2; in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 131 const float32x2_t vw = vld1_f32(w); w += 2; in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 184 float32x2_t vacc01n0 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 185 float32x2_t vacc01n1 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 189 const float32x2_t vi01 = vld1_f32(input); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 191 const float32x2_t vw = vld1_f32(w); w += 2; in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 197 float32x2_t vout01n0 = vmin_f32(vacc01n0, vget_low_f32(vmax)); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 198 float32x2_t vout01n1 = vmin_f32(vacc01n1, vget_low_f32(vmax)); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 214 float32x2_t vacc01 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 218 const float32x2_t vi01 = vld1_f32(input); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() [all …]
|
D | 8x4-minmax-neonfma.c | 218 float32x2_t vacc01n0 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 219 float32x2_t vacc01n1 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 220 float32x2_t vacc01n2 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 221 float32x2_t vacc01n3 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 225 const float32x2_t vi01 = vld1_f32(input); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 235 float32x2_t vout01n0 = vmin_f32(vacc01n0, vget_low_f32(vmax)); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 236 float32x2_t vout01n1 = vmin_f32(vacc01n1, vget_low_f32(vmax)); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 237 float32x2_t vout01n2 = vmin_f32(vacc01n2, vget_low_f32(vmax)); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 238 float32x2_t vout01n3 = vmin_f32(vacc01n3, vget_low_f32(vmax)); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 260 float32x2_t vacc01 = vld1_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() [all …]
|