/external/llvm-project/clang/test/CodeGen/ |
D | aarch64-v8.2a-neon-intrinsics.c | 20 float16x8_t test_vabsq_f16(float16x8_t a) { in test_vabsq_f16() 36 uint16x8_t test_vceqzq_f16(float16x8_t a) { in test_vceqzq_f16() 52 uint16x8_t test_vcgezq_f16(float16x8_t a) { in test_vcgezq_f16() 68 uint16x8_t test_vcgtzq_f16(float16x8_t a) { in test_vcgtzq_f16() 84 uint16x8_t test_vclezq_f16(float16x8_t a) { in test_vclezq_f16() 100 uint16x8_t test_vcltzq_f16(float16x8_t a) { in test_vcltzq_f16() 114 float16x8_t test_vcvtq_f16_s16 (int16x8_t a) { in test_vcvtq_f16_s16() 128 float16x8_t test_vcvtq_f16_u16 (uint16x8_t a) { in test_vcvtq_f16_u16() 142 int16x8_t test_vcvtq_s16_f16 (float16x8_t a) { in test_vcvtq_s16_f16() 156 uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) { in test_vcvtq_u16_f16() [all …]
|
D | arm-v8.2a-neon-intrinsics.c | 20 float16x8_t test_vabsq_f16(float16x8_t a) { in test_vabsq_f16() 36 uint16x8_t test_vceqzq_f16(float16x8_t a) { in test_vceqzq_f16() 52 uint16x8_t test_vcgezq_f16(float16x8_t a) { in test_vcgezq_f16() 68 uint16x8_t test_vcgtzq_f16(float16x8_t a) { in test_vcgtzq_f16() 84 uint16x8_t test_vclezq_f16(float16x8_t a) { in test_vclezq_f16() 100 uint16x8_t test_vcltzq_f16(float16x8_t a) { in test_vcltzq_f16() 114 float16x8_t test_vcvtq_f16_s16 (int16x8_t a) { in test_vcvtq_f16_s16() 128 float16x8_t test_vcvtq_f16_u16 (uint16x8_t a) { in test_vcvtq_f16_u16() 142 int16x8_t test_vcvtq_s16_f16 (float16x8_t a) { in test_vcvtq_s16_f16() 156 int16x8_t test_vcvtq_u16_f16 (float16x8_t a) { in test_vcvtq_u16_f16() [all …]
|
D | aarch64-v8.2a-neon-intrinsics-constrained.c | 48 float16x8_t test_vsqrtq_f16(float16x8_t a) { in test_vsqrtq_f16() 66 float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { in test_vfmaq_f16() 86 float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { in test_vfmsq_f16() 118 float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) { in test_vfmaq_lane_f16() 134 float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) { in test_vfma_laneq_f16() 150 float16x8_t test_vfmaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { in test_vfmaq_laneq_f16() 180 float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) { in test_vfmaq_n_f16() 200 float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) { in test_vfmah_laneq_f16() 234 float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) { in test_vfmsq_lane_f16() 252 float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) { in test_vfms_laneq_f16() [all …]
|
D | aarch64-neon-fp16fml.c | 69 float32x4_t test_vfmlalq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) { in test_vfmlalq_low_f16() 81 float32x4_t test_vfmlslq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) { in test_vfmlslq_low_f16() 93 float32x4_t test_vfmlalq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) { in test_vfmlalq_high_f16() 105 float32x4_t test_vfmlslq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) { in test_vfmlslq_high_f16() 303 float32x4_t test_vfmlalq_lane_low_f16(float32x4_t a, float16x8_t b, float16x4_t c) { in test_vfmlalq_lane_low_f16() 395 float32x4_t test_vfmlalq_lane_high_f16(float32x4_t a, float16x8_t b, float16x4_t c) { in test_vfmlalq_lane_high_f16() 447 float32x2_t test_vfmlal_laneq_low_f16(float32x2_t a, float16x4_t b, float16x8_t c) { in test_vfmlal_laneq_low_f16() 499 float32x2_t test_vfmlal_laneq_high_f16(float32x2_t a, float16x4_t b, float16x8_t c) { in test_vfmlal_laneq_high_f16() 591 float32x4_t test_vfmlalq_laneq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) { in test_vfmlalq_laneq_low_f16() 683 float32x4_t test_vfmlalq_laneq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) { in test_vfmlalq_laneq_high_f16() [all …]
|
/external/XNNPACK/src/f16-dwconv/gen/ |
D | up16x25-minmax-neonfp16arith-acc2.c | 33 const float16x8_t vmax = vld1q_dup_f16(¶ms->max); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 34 const float16x8_t vmin = vld1q_dup_f16(¶ms->min); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 167 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 168 float16x8_t vacc89ABCDEFp0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 171 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 172 const float16x8_t vi0x89ABCDEF = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 173 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 174 const float16x8_t vk0x89ABCDEF = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 178 const float16x8_t vi1x01234567 = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 179 const float16x8_t vi1x89ABCDEF = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() [all …]
|
D | up16x25-minmax-neonfp16arith.c | 33 const float16x8_t vmax = vld1q_dup_f16(¶ms->max); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 34 const float16x8_t vmin = vld1q_dup_f16(¶ms->min); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 167 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 168 float16x8_t vacc89ABCDEFp0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 171 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 172 const float16x8_t vi0x89ABCDEF = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 173 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 174 const float16x8_t vk0x89ABCDEF = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 178 const float16x8_t vi1x01234567 = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 179 const float16x8_t vi1x89ABCDEF = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() [all …]
|
D | up16x9-minmax-neonfp16arith-acc2.c | 33 const float16x8_t vmax = vld1q_dup_f16(¶ms->max); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 34 const float16x8_t vmin = vld1q_dup_f16(¶ms->min); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 87 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 88 float16x8_t vacc89ABCDEFp0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 91 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 92 const float16x8_t vi0x89ABCDEF = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 93 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 94 const float16x8_t vk0x89ABCDEF = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 98 const float16x8_t vi1x01234567 = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 99 const float16x8_t vi1x89ABCDEF = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() [all …]
|
D | up16x9-minmax-neonfp16arith.c | 33 const float16x8_t vmax = vld1q_dup_f16(¶ms->max); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 34 const float16x8_t vmin = vld1q_dup_f16(¶ms->min); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 87 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 88 float16x8_t vacc89ABCDEFp0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 91 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 92 const float16x8_t vi0x89ABCDEF = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 93 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 94 const float16x8_t vk0x89ABCDEF = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 98 const float16x8_t vi1x01234567 = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 99 const float16x8_t vi1x89ABCDEF = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() [all …]
|
D | up8x25-minmax-neonfp16arith-acc2.c | 33 const float16x8_t vmax = vld1q_dup_f16(¶ms->max); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 34 const float16x8_t vmin = vld1q_dup_f16(¶ms->min); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 167 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 170 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 171 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 174 const float16x8_t vi1x01234567 = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 175 const float16x8_t vk1x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 176 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 178 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 179 const float16x8_t vk2x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() [all …]
|
D | up8x25-minmax-neonfp16arith.c | 33 const float16x8_t vmax = vld1q_dup_f16(¶ms->max); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith() 34 const float16x8_t vmin = vld1q_dup_f16(¶ms->min); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith() 167 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith() 170 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith() 171 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith() 174 const float16x8_t vi1x01234567 = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith() 175 const float16x8_t vk1x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith() 178 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith() 179 const float16x8_t vk2x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith() 182 const float16x8_t vi3x01234567 = vld1q_f16(i3); i3 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith() [all …]
|
D | up16x4-minmax-neonfp16arith-acc2.c | 33 const float16x8_t vmax = vld1q_dup_f16(¶ms->max); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 34 const float16x8_t vmin = vld1q_dup_f16(¶ms->min); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 62 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 63 float16x8_t vacc89ABCDEFp0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 66 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 67 const float16x8_t vi0x89ABCDEF = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 68 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 69 const float16x8_t vk0x89ABCDEF = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 73 const float16x8_t vi1x01234567 = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 74 const float16x8_t vi1x89ABCDEF = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() [all …]
|
D | up16x4-minmax-neonfp16arith.c | 33 const float16x8_t vmax = vld1q_dup_f16(¶ms->max); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() 34 const float16x8_t vmin = vld1q_dup_f16(¶ms->min); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() 62 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() 63 float16x8_t vacc89ABCDEFp0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() 66 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() 67 const float16x8_t vi0x89ABCDEF = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() 68 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() 69 const float16x8_t vk0x89ABCDEF = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() 73 const float16x8_t vi1x01234567 = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() 74 const float16x8_t vi1x89ABCDEF = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() [all …]
|
D | up8x9-minmax-neonfp16arith-acc2.c | 33 const float16x8_t vmax = vld1q_dup_f16(¶ms->max); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 34 const float16x8_t vmin = vld1q_dup_f16(¶ms->min); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 87 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 90 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 91 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 94 const float16x8_t vi1x01234567 = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 95 const float16x8_t vk1x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 96 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 98 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 99 const float16x8_t vk2x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() [all …]
|
D | up8x9-minmax-neonfp16arith.c | 33 const float16x8_t vmax = vld1q_dup_f16(¶ms->max); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() 34 const float16x8_t vmin = vld1q_dup_f16(¶ms->min); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() 87 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() 90 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() 91 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() 94 const float16x8_t vi1x01234567 = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() 95 const float16x8_t vk1x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() 98 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() 99 const float16x8_t vk2x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() 102 const float16x8_t vi3x01234567 = vld1q_f16(i3); i3 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() [all …]
|
D | up8x4-minmax-neonfp16arith-acc2.c | 33 const float16x8_t vmax = vld1q_dup_f16(¶ms->max); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() 34 const float16x8_t vmin = vld1q_dup_f16(¶ms->min); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() 62 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() 65 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() 66 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() 69 const float16x8_t vi1x01234567 = vld1q_f16(i1); i1 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() 70 const float16x8_t vk1x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() 71 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() 73 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() 74 const float16x8_t vk2x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() [all …]
|
/external/XNNPACK/src/f16-gavgpool/ |
D | 7p7x-minmax-neonfp16arith-c8.c | 40 const float16x8_t vi0 = vld1q_f16(i0); i0 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() 41 const float16x8_t vi1 = vld1q_f16(i1); i1 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() 42 const float16x8_t vi2 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() 43 const float16x8_t vi3 = vld1q_f16(i3); i3 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() 44 const float16x8_t vi4 = vld1q_f16(i4); i4 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() 45 const float16x8_t vi5 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() 46 const float16x8_t vi6 = vld1q_f16(i6); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() 48 const float16x8_t vsum01 = vaddq_f16(vi0, vi1); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() 49 const float16x8_t vsum23 = vaddq_f16(vi2, vi3); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() 50 const float16x8_t vsum45 = vaddq_f16(vi4, vi5); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() [all …]
|
D | 7x-minmax-neonfp16arith-c8.c | 52 const float16x8_t vscale = vld1q_dup_f16(¶ms->scale); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8() 53 const float16x8_t vmin = vld1q_dup_f16(¶ms->min); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8() 54 const float16x8_t vmax = vld1q_dup_f16(¶ms->max); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8() 57 const float16x8_t vi0 = vld1q_f16(i0); i0 += 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8() 58 const float16x8_t vi1 = vld1q_f16(i1); i1 += 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8() 59 const float16x8_t vi2 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8() 60 const float16x8_t vi3 = vld1q_f16(i3); i3 += 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8() 61 const float16x8_t vi4 = vld1q_f16(i4); i4 += 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8() 62 const float16x8_t vi5 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8() 63 const float16x8_t vi6 = vld1q_f16(i6); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8() [all …]
|
/external/llvm-project/clang/test/CodeGen/arm-mve-intrinsics/ |
D | vcmlaq.c | 12 float16x8_t test_vcmlaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) in test_vcmlaq_f16() 40 float16x8_t test_vcmlaq_rot90_f16(float16x8_t a, float16x8_t b, float16x8_t c) in test_vcmlaq_rot90_f16() 68 float16x8_t test_vcmlaq_rot180_f16(float16x8_t a, float16x8_t b, float16x8_t c) in test_vcmlaq_rot180_f16() 96 float16x8_t test_vcmlaq_rot270_f16(float16x8_t a, float16x8_t b, float16x8_t c) in test_vcmlaq_rot270_f16() 126 float16x8_t test_vcmlaq_m_f16(float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) in test_vcmlaq_m_f16() 158 float16x8_t test_vcmlaq_rot90_m_f16(float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) in test_vcmlaq_rot90_m_f16() 190 float16x8_t test_vcmlaq_rot180_m_f16(float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) in test_vcmlaq_rot180_m_f16() 222 float16x8_t test_vcmlaq_rot270_m_f16(float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) in test_vcmlaq_rot270_m_f16()
|
D | vcmulq.c | 12 float16x8_t test_vcmulq_f16(float16x8_t a, float16x8_t b) in test_vcmulq_f16() 40 float16x8_t test_vcmulq_rot90_f16(float16x8_t a, float16x8_t b) in test_vcmulq_rot90_f16() 68 float16x8_t test_vcmulq_rot180_f16(float16x8_t a, float16x8_t b) in test_vcmulq_rot180_f16() 96 float16x8_t test_vcmulq_rot270_f16(float16x8_t a, float16x8_t b) in test_vcmulq_rot270_f16() 126 float16x8_t test_vcmulq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) in test_vcmulq_m_f16() 158 float16x8_t test_vcmulq_rot90_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_… in test_vcmulq_rot90_m_f16() 190 float16x8_t test_vcmulq_rot180_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16… in test_vcmulq_rot180_m_f16() 222 float16x8_t test_vcmulq_rot270_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16… in test_vcmulq_rot270_m_f16() 254 float16x8_t test_vcmulq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) in test_vcmulq_x_f16() 286 float16x8_t test_vcmulq_rot90_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) in test_vcmulq_rot90_x_f16() [all …]
|
D | vrnd.c | 12 float16x8_t test_vrndaq_f16(float16x8_t a) in test_vrndaq_f16() 40 float16x8_t test_vrndmq_f16(float16x8_t a) in test_vrndmq_f16() 68 float16x8_t test_vrndpq_f16(float16x8_t a) in test_vrndpq_f16() 96 float16x8_t test_vrndq_f16(float16x8_t a) in test_vrndq_f16() 124 float16x8_t test_vrndxq_f16(float16x8_t a) in test_vrndxq_f16() 152 float16x8_t test_vrndnq_f16(float16x8_t a) in test_vrndnq_f16() 182 float16x8_t test_vrndaq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) in test_vrndaq_m_f16() 214 float16x8_t test_vrndmq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) in test_vrndmq_m_f16() 246 float16x8_t test_vrndnq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) in test_vrndnq_m_f16() 278 float16x8_t test_vrndpq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) in test_vrndpq_m_f16() [all …]
|
/external/XNNPACK/src/f16-gemm/gen/ |
D | 1x16-minmax-neonfp16arith-ld64.c | 45 …float16x8_t vacc0x01234567 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 46 …float16x8_t vacc0x89ABCDEF = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 52 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 53 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 59 const float16x8_t va0c0 = vdupq_lane_f16(va0, 0); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 64 …const float16x8_t vb01234567c1 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 65 …const float16x8_t vb89ABCDEFc1 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 71 const float16x8_t va0c1 = vdupq_lane_f16(va0, 1); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 76 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 77 …const float16x8_t vb89ABCDEFc2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f16-gemm/gen-inc/ |
D | 1x16inc-minmax-neonfp16arith-ld64.c | 47 …float16x8_t vacc0x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 48 …float16x8_t vacc0x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 54 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 55 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 61 const float16x8_t va0c0 = vdupq_lane_f16(va0, 0); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 66 …const float16x8_t vb01234567c1 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 67 …const float16x8_t vb89ABCDEFc1 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 73 const float16x8_t va0c1 = vdupq_lane_f16(va0, 1); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 78 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 79 …const float16x8_t vb89ABCDEFc2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() [all …]
|
D | 8x8inc-minmax-neonfp16arith-ld64.c | 89 …float16x8_t vacc0x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 90 …float16x8_t vacc1x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 91 …float16x8_t vacc2x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 92 …float16x8_t vacc3x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 93 …float16x8_t vacc4x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 94 …float16x8_t vacc5x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 95 …float16x8_t vacc6x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 96 …float16x8_t vacc7x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 109 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 121 const float16x8_t va0c0 = vdupq_lane_f16(va0, 0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() [all …]
|
D | 4x16inc-minmax-neonfp16arith-ld64.c | 65 …float16x8_t vacc0x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 66 …float16x8_t vacc0x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 67 …float16x8_t vacc1x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 68 …float16x8_t vacc1x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 69 …float16x8_t vacc2x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 70 …float16x8_t vacc2x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 71 …float16x8_t vacc3x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 72 …float16x8_t vacc3x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 81 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 82 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f16-igemm/gen/ |
D | 1x16-minmax-neonfp16arith-ld64.c | 47 …float16x8_t vacc0x01234567 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 48 …float16x8_t vacc0x89ABCDEF = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 63 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 64 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 70 const float16x8_t va0c0 = vdupq_lane_f16(va0, 0); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 75 …const float16x8_t vb01234567c1 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 76 …const float16x8_t vb89ABCDEFc1 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 82 const float16x8_t va0c1 = vdupq_lane_f16(va0, 1); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 87 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 88 …const float16x8_t vb89ABCDEFc2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() [all …]
|