/external/gemmlowp/fixedpoint/ |
D | fixedpoint_neon.h | 26 struct FixedPointRawTypeTraits<int32x4_t> { 38 inline int32x4_t BitAnd(int32x4_t a, int32x4_t b) { 48 inline int32x4_t BitOr(int32x4_t a, int32x4_t b) { 58 inline int32x4_t BitXor(int32x4_t a, int32x4_t b) { 68 inline int32x4_t BitNot(int32x4_t a) { 78 inline int32x4_t Add(int32x4_t a, int32x4_t b) { 88 inline int32x4_t Sub(int32x4_t a, int32x4_t b) { 98 inline int32x4_t Neg(int32x4_t a) { 108 inline int32x4_t ShiftLeft(int32x4_t a, int offset) { 118 inline int32x4_t ShiftLeft(int32x4_t a, int32x4_t offset) { [all …]
|
/external/clang/test/CodeGen/ |
D | aarch64-neon-2velem.c | 39 int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { in test_vmlaq_lane_s32() 66 int32x2_t test_vmla_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vmla_laneq_s32() 75 int32x4_t test_vmlaq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { in test_vmlaq_laneq_s32() 111 int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { in test_vmlsq_lane_s32() 138 int32x2_t test_vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vmls_laneq_s32() 147 int32x4_t test_vmlsq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { in test_vmlsq_laneq_s32() 179 int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t v) { in test_vmulq_lane_s32() 235 int32x2_t test_vmul_laneq_s32(int32x2_t a, int32x4_t v) { in test_vmul_laneq_s32() 243 int32x4_t test_vmulq_laneq_s32(int32x4_t a, int32x4_t v) { in test_vmulq_laneq_s32() 505 int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { in test_vmlal_lane_s16() [all …]
|
D | arm-v8.1a-neon-intrinsics.c | 44 int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { in test_vqrdmlahq_s32() 90 int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { in test_vqrdmlahq_lane_s32() 132 int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { in test_vqrdmlshq_s32() 178 int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { in test_vqrdmlshq_lane_s32()
|
D | aarch64-v8.1a-neon-intrinsics.c | 17 int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vqrdmlah_laneq_s32() 33 int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { in test_vqrdmlahq_laneq_s32() 97 int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) { in test_vqrdmlahs_laneq_s32() 113 int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vqrdmlsh_laneq_s32() 129 int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { in test_vqrdmlshq_laneq_s32() 193 int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) { in test_vqrdmlshs_laneq_s32()
|
D | aarch64-neon-misc.c | 91 uint32x4_t test_vceqzq_s32(int32x4_t a) { in test_vceqzq_s32() 319 uint32x4_t test_vcgezq_s32(int32x4_t a) { in test_vcgezq_s32() 435 uint32x4_t test_vclezq_s32(int32x4_t a) { in test_vclezq_s32() 551 uint32x4_t test_vcgtzq_s32(int32x4_t a) { in test_vcgtzq_s32() 667 uint32x4_t test_vcltzq_s32(int32x4_t a) { in test_vcltzq_s32() 927 int32x4_t test_vrev64q_s32(int32x4_t a) { in test_vrev64q_s32() 1035 int32x4_t test_vpaddlq_s16(int16x8_t a) { in test_vpaddlq_s16() 1044 int64x2_t test_vpaddlq_s32(int32x4_t a) { in test_vpaddlq_s32() 1159 int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) { in test_vpadalq_s16() 1171 int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) { in test_vpadalq_s32() [all …]
|
D | arm_neon_intrinsics.c | 113 int32x4_t test_vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { in test_vabaq_s32() 176 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { in test_vabal_s16() 349 int32x4_t test_vabdq_s32(int32x4_t a, int32x4_t b) { in test_vabdq_s32() 420 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) { in test_vabdl_s16() 536 int32x4_t test_vabsq_s32(int32x4_t a) { in test_vabsq_s32() 630 int32x4_t test_vaddq_s32(int32x4_t a, int32x4_t b) { in test_vaddq_s32() 699 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { in test_vaddhn_s32() 774 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) { in test_vaddl_s16() 841 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) { in test_vaddw_s16() 957 int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) { in test_vandq_s32() [all …]
|
/external/XNNPACK/src/requantization/ |
D | gemmlowp-neon.c | 57 const int32x4_t vmultiplier = vdupq_n_s32(multiplier); in xnn_requantize_gemmlowp__neon() 59 const int32x4_t vshift = vdupq_n_s32(-shift); in xnn_requantize_gemmlowp__neon() 63 const int32x4_t x = vld1q_s32(input); in xnn_requantize_gemmlowp__neon() 64 const int32x4_t y = vld1q_s32(input + 4); in xnn_requantize_gemmlowp__neon() 65 const int32x4_t z = vld1q_s32(input + 8); in xnn_requantize_gemmlowp__neon() 66 const int32x4_t w = vld1q_s32(input + 12); in xnn_requantize_gemmlowp__neon() 69 const int32x4_t x_product = vqrdmulhq_s32(x, vmultiplier); in xnn_requantize_gemmlowp__neon() 70 const int32x4_t y_product = vqrdmulhq_s32(y, vmultiplier); in xnn_requantize_gemmlowp__neon() 71 const int32x4_t z_product = vqrdmulhq_s32(z, vmultiplier); in xnn_requantize_gemmlowp__neon() 72 const int32x4_t w_product = vqrdmulhq_s32(w, vmultiplier); in xnn_requantize_gemmlowp__neon() [all …]
|
D | q31-neon.c | 45 const int32x4_t vmultiplier = vdupq_n_s32(multiplier); in xnn_requantize_q31__neon() 47 const int32x4_t vshift = vdupq_n_s32(-shift); in xnn_requantize_q31__neon() 48 const int32x4_t vshift_eq_0_mask = vreinterpretq_s32_u32(vceqq_s32(vshift, vmovq_n_s32(0))); in xnn_requantize_q31__neon() 52 const int32x4_t x = vld1q_s32(input); in xnn_requantize_q31__neon() 53 const int32x4_t y = vld1q_s32(input + 4); in xnn_requantize_q31__neon() 54 const int32x4_t z = vld1q_s32(input + 8); in xnn_requantize_q31__neon() 55 const int32x4_t w = vld1q_s32(input + 12); in xnn_requantize_q31__neon() 60 const int32x4_t x_product = vqrdmulhq_s32(x, vmultiplier); in xnn_requantize_q31__neon() 61 const int32x4_t y_product = vqrdmulhq_s32(y, vmultiplier); in xnn_requantize_q31__neon() 62 const int32x4_t z_product = vqrdmulhq_s32(z, vmultiplier); in xnn_requantize_q31__neon() [all …]
|
D | fp32-neon.c | 39 const int32x4_t vimagic = vdupq_n_s32(INT32_C(0x4B400000) - (int32_t)(uint32_t) zero_point); in xnn_requantize_fp32__neon() 42 const int32x4_t x = vld1q_s32(input); in xnn_requantize_fp32__neon() 43 const int32x4_t y = vld1q_s32(input + 4); in xnn_requantize_fp32__neon() 44 const int32x4_t z = vld1q_s32(input + 8); in xnn_requantize_fp32__neon() 45 const int32x4_t w = vld1q_s32(input + 12); in xnn_requantize_fp32__neon() 63 const int32x4_t x_rounded = vcvtnq_s32_f32(x_scaled); in xnn_requantize_fp32__neon() 64 const int32x4_t y_rounded = vcvtnq_s32_f32(y_scaled); in xnn_requantize_fp32__neon() 65 const int32x4_t z_rounded = vcvtnq_s32_f32(z_scaled); in xnn_requantize_fp32__neon() 66 const int32x4_t w_rounded = vcvtnq_s32_f32(w_scaled); in xnn_requantize_fp32__neon() 95 …const int32x4_t x_biased = vsubq_s32(vreinterpretq_s32_f32(vaddq_f32(x_clamped, vfmagic)), vimagic… in xnn_requantize_fp32__neon() [all …]
|
D | precise-neon.c | 39 const int32x4_t vmultiplier = vdupq_n_s32(multiplier); in xnn_requantize_precise__neon() 48 const int32x4_t x = vld1q_s32(input); in xnn_requantize_precise__neon() 49 const int32x4_t y = vld1q_s32(input + 4); in xnn_requantize_precise__neon() 50 const int32x4_t z = vld1q_s32(input + 8); in xnn_requantize_precise__neon() 51 const int32x4_t w = vld1q_s32(input + 12); in xnn_requantize_precise__neon() 109 …const int32x4_t x_scaled = vuzp1q_s32(vreinterpretq_s32_s64(x01_scaled), vreinterpretq_s32_s64(x23… in xnn_requantize_precise__neon() 110 …const int32x4_t y_scaled = vuzp1q_s32(vreinterpretq_s32_s64(y01_scaled), vreinterpretq_s32_s64(y23… in xnn_requantize_precise__neon() 111 …const int32x4_t z_scaled = vuzp1q_s32(vreinterpretq_s32_s64(z01_scaled), vreinterpretq_s32_s64(z23… in xnn_requantize_precise__neon() 112 …const int32x4_t w_scaled = vuzp1q_s32(vreinterpretq_s32_s64(w01_scaled), vreinterpretq_s32_s64(w23… in xnn_requantize_precise__neon() 118 const int32x4_t x_scaled = vcombine_s32(vmovn_s64(x01_scaled), vmovn_s64(x23_scaled)); in xnn_requantize_precise__neon() [all …]
|
/external/libopus/silk/arm/ |
D | NSQ_neon.c | 40 int32x4_t coef0 = vld1q_s32(coef32); in silk_noise_shape_quantizer_short_prediction_neon() 41 int32x4_t coef1 = vld1q_s32(coef32 + 4); in silk_noise_shape_quantizer_short_prediction_neon() 42 int32x4_t coef2 = vld1q_s32(coef32 + 8); in silk_noise_shape_quantizer_short_prediction_neon() 43 int32x4_t coef3 = vld1q_s32(coef32 + 12); in silk_noise_shape_quantizer_short_prediction_neon() 45 int32x4_t a0 = vld1q_s32(buf32 - 15); in silk_noise_shape_quantizer_short_prediction_neon() 46 int32x4_t a1 = vld1q_s32(buf32 - 11); in silk_noise_shape_quantizer_short_prediction_neon() 47 int32x4_t a2 = vld1q_s32(buf32 - 7); in silk_noise_shape_quantizer_short_prediction_neon() 48 int32x4_t a3 = vld1q_s32(buf32 - 3); in silk_noise_shape_quantizer_short_prediction_neon() 50 int32x4_t b0 = vqdmulhq_s32(coef0, a0); in silk_noise_shape_quantizer_short_prediction_neon() 51 int32x4_t b1 = vqdmulhq_s32(coef1, a1); in silk_noise_shape_quantizer_short_prediction_neon() [all …]
|
/external/libvpx/libvpx/vp9/common/arm/neon/ |
D | vp9_iht_neon.h | 24 const int32x4_t c3 = vdupq_n_s32(sinpi_3_9); in iadst4() 26 int32x4_t s[8], output[4]; in iadst4() 63 const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(x[0]), c, 0); in iadst_half_butterfly_neon() 64 const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(x[0]), c, 0); in iadst_half_butterfly_neon() 65 const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(x[1]), c, 0); in iadst_half_butterfly_neon() 66 const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(x[1]), c, 0); in iadst_half_butterfly_neon() 67 int32x4_t t0[2], t1[2]; in iadst_half_butterfly_neon() 81 const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(*x0), c, 1); in iadst_half_butterfly_neg_neon() 82 const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(*x0), c, 1); in iadst_half_butterfly_neg_neon() 83 const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(*x1), c, 1); in iadst_half_butterfly_neg_neon() [all …]
|
D | vp9_highbd_iht8x8_add_neon.c | 21 static INLINE void highbd_iadst_half_butterfly_neon(int32x4_t *const x, in highbd_iadst_half_butterfly_neon() 23 const int32x4_t sum = vaddq_s32(x[0], x[1]); in highbd_iadst_half_butterfly_neon() 24 const int32x4_t sub = vsubq_s32(x[0], x[1]); in highbd_iadst_half_butterfly_neon() 38 static INLINE void highbd_iadst_butterfly_lane_0_1_neon(const int32x4_t in0, in highbd_iadst_butterfly_lane_0_1_neon() 39 const int32x4_t in1, in highbd_iadst_butterfly_lane_0_1_neon() 54 static INLINE void highbd_iadst_butterfly_lane_1_0_neon(const int32x4_t in0, in highbd_iadst_butterfly_lane_1_0_neon() 55 const int32x4_t in1, in highbd_iadst_butterfly_lane_1_0_neon() 70 static INLINE int32x4_t highbd_add_dct_const_round_shift_low_8( in highbd_add_dct_const_round_shift_low_8() 79 static INLINE int32x4_t highbd_sub_dct_const_round_shift_low_8( in highbd_sub_dct_const_round_shift_low_8() 88 static INLINE void highbd_iadst8(int32x4_t *const io0, int32x4_t *const io1, in highbd_iadst8() [all …]
|
/external/webrtc/webrtc/modules/audio_coding/codecs/isac/fix/source/ |
D | transform_neon.c | 35 int32x4_t factq = vdupq_n_s32(fact); in ComplexMulAndFindMaxNeon() 50 int32x4_t tmp0 = vmull_s16(vget_low_s16(tmpr), vget_low_s16(inre1)); in ComplexMulAndFindMaxNeon() 51 int32x4_t tmp1 = vmull_s16(vget_low_s16(tmpr), vget_low_s16(inre2)); in ComplexMulAndFindMaxNeon() 55 int32x4_t tmp2 = vmull_high_s16(tmpr, inre1); in ComplexMulAndFindMaxNeon() 56 int32x4_t tmp3 = vmull_high_s16(tmpr, inre2); in ComplexMulAndFindMaxNeon() 60 int32x4_t tmp2 = vmull_s16(vget_high_s16(tmpr), vget_high_s16(inre1)); in ComplexMulAndFindMaxNeon() 61 int32x4_t tmp3 = vmull_s16(vget_high_s16(tmpr), vget_high_s16(inre2)); in ComplexMulAndFindMaxNeon() 66 int32x4_t outr_0 = vqdmulhq_s32(tmp0, factq); in ComplexMulAndFindMaxNeon() 67 int32x4_t outr_1 = vqdmulhq_s32(tmp2, factq); in ComplexMulAndFindMaxNeon() 68 int32x4_t outi_0 = vqdmulhq_s32(tmp1, factq); in ComplexMulAndFindMaxNeon() [all …]
|
/external/libhevc/common/arm/ |
D | ihevc_weighted_pred_neon_intr.c | 118 int32x4_t i4_tmp1_t; in ihevc_weighted_pred_uni_neonintr() 119 int32x4_t i4_tmp2_t; in ihevc_weighted_pred_uni_neonintr() 120 int32x4_t sto_res_tmp1; in ihevc_weighted_pred_uni_neonintr() 124 int32x4_t tmp_lvl_shift_t; in ihevc_weighted_pred_uni_neonintr() 126 int32x4_t tmp_shift_t; in ihevc_weighted_pred_uni_neonintr() 251 int32x4_t i4_tmp1_t; in ihevc_weighted_pred_chroma_uni_neonintr() 252 int32x4_t i4_tmp2_t; in ihevc_weighted_pred_chroma_uni_neonintr() 253 int32x4_t sto_res_tmp1; in ihevc_weighted_pred_chroma_uni_neonintr() 257 int32x4_t tmp_lvl_shift_t_u, tmp_lvl_shift_t_v; in ihevc_weighted_pred_chroma_uni_neonintr() 260 int32x4_t tmp_shift_t; in ihevc_weighted_pred_chroma_uni_neonintr() [all …]
|
D | ihevc_resi_trans_neon_32x32.c | 113 int32x4_t sum_val = vdupq_n_s32(0); in ihevc_resi_trans_32x32_neon() 1041 static const int32x4_t g_ai4_ihevc_trans_32_0_8 = { 64, -64, 83, -83 }; in ihevc_resi_trans_32x32_neon() 1042 static const int32x4_t g_ai4_ihevc_trans_32_1_8 = { 64, 64, 36, 36 }; in ihevc_resi_trans_32x32_neon() 1044 static const int32x4_t g_ai4_ihevc_trans_32_4_04 = { 89, 75, 50, 18 }; in ihevc_resi_trans_32x32_neon() 1045 static const int32x4_t g_ai4_ihevc_trans_32_12_04 = { 75, -18, -89, -50 }; in ihevc_resi_trans_32x32_neon() 1046 static const int32x4_t g_ai4_ihevc_trans_32_20_04 = { 50, -89, 18, 75 }; in ihevc_resi_trans_32x32_neon() 1047 static const int32x4_t g_ai4_ihevc_trans_32_28_04 = { 18, -50, 75, -89 }; in ihevc_resi_trans_32x32_neon() 1049 static const int32x4_t g_ai4_ihevc_trans_32_2_03 = { 90, 87, 80, 70 }; in ihevc_resi_trans_32x32_neon() 1050 static const int32x4_t g_ai4_ihevc_trans_32_2_47 = { 57, 43, 25, 9 }; in ihevc_resi_trans_32x32_neon() 1051 static const int32x4_t g_ai4_ihevc_trans_32_6_03 = { 87, 57, 9, -43 }; in ihevc_resi_trans_32x32_neon() [all …]
|
/external/libvpx/libvpx/vpx_dsp/ppc/ |
D | fdct32x32_vsx.c | 23 const int32x4_t ac_e = vec_mule(a, cospi16_v); in single_butterfly() 24 const int32x4_t ac_o = vec_mulo(a, cospi16_v); in single_butterfly() 25 const int32x4_t bc_e = vec_mule(b, cospi16_v); in single_butterfly() 26 const int32x4_t bc_o = vec_mulo(b, cospi16_v); in single_butterfly() 29 const int32x4_t sum_e = vec_add(ac_e, bc_e); in single_butterfly() 30 const int32x4_t sum_o = vec_add(ac_o, bc_o); in single_butterfly() 31 const int32x4_t diff_e = vec_sub(ac_e, bc_e); in single_butterfly() 32 const int32x4_t diff_o = vec_sub(ac_o, bc_o); in single_butterfly() 35 const int32x4_t rsum_o = vec_add(sum_o, vec_dct_const_rounding); in single_butterfly() 36 const int32x4_t rsum_e = vec_add(sum_e, vec_dct_const_rounding); in single_butterfly() [all …]
|
D | deblock_vsx.c | 149 static INLINE int32x4_t slide_sumsq_s32(int32x4_t xsq_even, int32x4_t xsq_odd) { in slide_sumsq_s32() 152 int32x4_t sumsq_1 = vec_add(vec_slo(xsq_even, vec_splats((int8_t)(4 << 3))), in slide_sumsq_s32() 156 int32x4_t sumsq_2 = vec_add(vec_slo(xsq_even, vec_splats((int8_t)(8 << 3))), in slide_sumsq_s32() 160 int32x4_t sumsq_3 = vec_add(vec_slo(xsq_even, vec_splats((int8_t)(12 << 3))), in slide_sumsq_s32() 173 static INLINE bool16x8_t mask_s16(int32x4_t sumsq_even, int32x4_t sumsq_odd, in mask_s16() 174 int16x8_t sum, int32x4_t lim) { in mask_s16() 178 const int32x4_t sumsq_odd_scaled = in mask_s16() 180 const int32x4_t sumsq_even_scaled = in mask_s16() 182 const int32x4_t thres_odd = vec_sub(sumsq_odd_scaled, vec_mulo(sum, sum)); in mask_s16() 183 const int32x4_t thres_even = vec_sub(sumsq_even_scaled, vec_mule(sum, sum)); in mask_s16() [all …]
|
D | bitdepth_conversion_vsx.h | 23 int32x4_t u = vec_vsx_ld(c, s); in load_tran_low() 24 int32x4_t v = vec_vsx_ld(c, s + 4); in load_tran_low() 36 const int32x4_t even = vec_mule(v, one); in store_tran_low() 37 const int32x4_t odd = vec_mulo(v, one); in store_tran_low() 38 const int32x4_t high = vec_mergeh(even, odd); in store_tran_low() 39 const int32x4_t low = vec_mergel(even, odd); in store_tran_low()
|
/external/libaom/libaom/av1/common/arm/ |
D | convolve_neon.h | 24 int32x4_t sum0, sum1; in wiener_convolve8_vert_4x8() 30 const int32x4_t round_bits = vdupq_n_s32(-round1_bits); in wiener_convolve8_vert_4x8() 31 const int32x4_t zero = vdupq_n_s32(0); in wiener_convolve8_vert_4x8() 32 const int32x4_t round_vec = vdupq_n_s32(round_const); in wiener_convolve8_vert_4x8() 73 int32x4_t sum_0, sum_1; in wiener_convolve8_horiz_8x8() 74 int32x4_t s3_0, s3_1; in wiener_convolve8_horiz_8x8() 79 const int32x4_t round_bits = vdupq_n_s32(-round0_bits); in wiener_convolve8_horiz_8x8() 81 const int32x4_t round_vec_0 = vdupq_n_s32(round_const_0); in wiener_convolve8_horiz_8x8() 82 const int32x4_t round_vec_1 = vdupq_n_s32(round_const_1); in wiener_convolve8_horiz_8x8() 123 int32x4_t sum_0, s3_0; in wiener_convolve8_horiz_4x8() [all …]
|
/external/XNNPACK/src/q8-vadd/ |
D | neon.c | 25 const int32x4_t va_multiplier = vld1q_dup_s32(¶ms->neon.a_multiplier); in xnn_q8_vadd_ukernel__neon() 26 const int32x4_t vb_multiplier = vld1q_dup_s32(¶ms->neon.b_multiplier); in xnn_q8_vadd_ukernel__neon() 27 const int32x4_t vright_shift = vld1q_dup_s32(¶ms->neon.right_shift); in xnn_q8_vadd_ukernel__neon() 28 const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_q8_vadd_ukernel__neon() 49 int32x4_t vacc0_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa0)), va_multiplier); in xnn_q8_vadd_ukernel__neon() 50 int32x4_t vacc1_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa1)), va_multiplier); in xnn_q8_vadd_ukernel__neon() 51 int32x4_t vacc2_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa2)), va_multiplier); in xnn_q8_vadd_ukernel__neon() 52 int32x4_t vacc3_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa3)), va_multiplier); in xnn_q8_vadd_ukernel__neon() 53 int32x4_t vacc0_hi = vmulq_s32(vmovl_high_s16(vxa0), va_multiplier); in xnn_q8_vadd_ukernel__neon() 54 int32x4_t vacc1_hi = vmulq_s32(vmovl_high_s16(vxa1), va_multiplier); in xnn_q8_vadd_ukernel__neon() [all …]
|
/external/libvpx/libvpx/vpx_dsp/arm/ |
D | highbd_idct8x8_add_neon.c | 72 const int32x4_t cospis0, const int32x4_t cospis1, int32x4_t *const io0, in idct8x8_12_half1d_bd10() 73 int32x4_t *const io1, int32x4_t *const io2, int32x4_t *const io3, in idct8x8_12_half1d_bd10() 74 int32x4_t *const io4, int32x4_t *const io5, int32x4_t *const io6, in idct8x8_12_half1d_bd10() 75 int32x4_t *const io7) { in idct8x8_12_half1d_bd10() 76 int32x4_t step1[8], step2[8]; in idct8x8_12_half1d_bd10() 127 const int32x4_t cospis0, const int32x4_t cospis1, int32x4_t *const io0, in idct8x8_12_half1d_bd12() 128 int32x4_t *const io1, int32x4_t *const io2, int32x4_t *const io3, in idct8x8_12_half1d_bd12() 129 int32x4_t *const io4, int32x4_t *const io5, int32x4_t *const io6, in idct8x8_12_half1d_bd12() 130 int32x4_t *const io7) { in idct8x8_12_half1d_bd12() 133 int32x4_t step1[8], step2[8]; in idct8x8_12_half1d_bd12() [all …]
|
D | fdct_neon.c | 52 const int32x4_t s_0_p_s_1 = vaddl_s16(s_0, s_1); in vpx_fdct4x4_neon() 53 const int32x4_t s_0_m_s_1 = vsubl_s16(s_0, s_1); in vpx_fdct4x4_neon() 54 const int32x4_t temp1 = vmulq_n_s32(s_0_p_s_1, cospi_16_64); in vpx_fdct4x4_neon() 55 const int32x4_t temp2 = vmulq_n_s32(s_0_m_s_1, cospi_16_64); in vpx_fdct4x4_neon() 63 const int32x4_t s_3_cospi_8_64 = vmull_n_s16(s_3, cospi_8_64); in vpx_fdct4x4_neon() 64 const int32x4_t s_3_cospi_24_64 = vmull_n_s16(s_3, cospi_24_64); in vpx_fdct4x4_neon() 66 const int32x4_t temp3 = vmlal_n_s16(s_3_cospi_8_64, s_2, cospi_24_64); in vpx_fdct4x4_neon() 67 const int32x4_t temp4 = vmlsl_n_s16(s_3_cospi_24_64, s_2, cospi_8_64); in vpx_fdct4x4_neon()
|
/external/skqp/src/core/ |
D | SkBitmapProcState_matrixProcs.cpp | 223 int32x4_t vdx8 = vdupq_n_s32(dx8); in decal_nofilter_scale_neon() 226 int32x4_t lbase, hbase; in decal_nofilter_scale_neon() 257 int32x4_t vdx8 = vdupq_n_s32(dx8); in decal_filter_scale_neon() 259 int32x4_t wide_fx, wide_fx2; in decal_filter_scale_neon() 268 int32x4_t wide_out; in decal_filter_scale_neon() 269 int32x4_t wide_out2; in decal_filter_scale_neon() 305 static inline int16x8_t clamp8(int32x4_t low, int32x4_t high, unsigned max) { in clamp8() 318 static inline int32x4_t clamp4(int32x4_t f, unsigned max) { in clamp4() 319 int32x4_t res; in clamp4() 331 static inline int32x4_t extract_low_bits_clamp4(int32x4_t fx, unsigned) { in extract_low_bits_clamp4() [all …]
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/integer_ops/ |
D | softmax.h | 89 gemmlowp::FixedPoint<int32x4_t, kAccumulationIntegerBits>; in Softmax() 91 gemmlowp::FixedPoint<int32x4_t, kScaledDiffIntegerBits>; in Softmax() 92 using FixedPoint0Int32x4 = gemmlowp::FixedPoint<int32x4_t, 0>; in Softmax() 104 int32x4_t diff_min_s32 = vdupq_n_s32(diff_min); in Softmax() 111 int32x4_t input_diff_s32_0 = vmovl_s16(vget_low_s16(input_diff_s16)); in Softmax() 112 int32x4_t input_diff_s32_1 = vmovl_s16(vget_high_s16(input_diff_s16)); in Softmax() 113 int32x4_t mask_0 = in Softmax() 115 int32x4_t mask_1 = in Softmax() 138 int32x4_t sum_of_exps_reduced_4 = (sum_of_exps_0 + sum_of_exps_1).raw(); in Softmax() 178 int32x4_t input_diff_s32_0 = vmovl_s16(vget_low_s16(input_diff_s16)); in Softmax() [all …]
|