/external/XNNPACK/src/q8-vadd/ |
D | neon.c | 49 int32x4_t vacc0_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa0)), va_multiplier); in xnn_q8_vadd_ukernel__neon() 50 int32x4_t vacc1_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa1)), va_multiplier); in xnn_q8_vadd_ukernel__neon() 51 int32x4_t vacc2_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa2)), va_multiplier); in xnn_q8_vadd_ukernel__neon() 52 int32x4_t vacc3_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa3)), va_multiplier); in xnn_q8_vadd_ukernel__neon() 58 vacc0_lo = vmlaq_s32(vacc0_lo, vmovl_s16(vget_low_s16(vxb0)), vb_multiplier); in xnn_q8_vadd_ukernel__neon() 59 vacc1_lo = vmlaq_s32(vacc1_lo, vmovl_s16(vget_low_s16(vxb1)), vb_multiplier); in xnn_q8_vadd_ukernel__neon() 60 vacc2_lo = vmlaq_s32(vacc2_lo, vmovl_s16(vget_low_s16(vxb2)), vb_multiplier); in xnn_q8_vadd_ukernel__neon() 61 vacc3_lo = vmlaq_s32(vacc3_lo, vmovl_s16(vget_low_s16(vxb3)), vb_multiplier); in xnn_q8_vadd_ukernel__neon() 115 int32x4_t vacc0_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa0)), va_multiplier); in xnn_q8_vadd_ukernel__neon() 116 int32x4_t vacc1_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa1)), va_multiplier); in xnn_q8_vadd_ukernel__neon() [all …]
|
/external/libhevc/encoder/arm/ |
D | ihevce_hme_utils_neon.c | 169 dst0_4x32b = vmovl_s16(vget_low_s16(src4_8x16b)); in ihevce_get_wt_inp_4x8_neon() 170 dst1_4x32b = vmovl_s16(vget_low_s16(src5_8x16b)); in ihevce_get_wt_inp_4x8_neon() 171 dst2_4x32b = vmovl_s16(vget_low_s16(src6_8x16b)); in ihevce_get_wt_inp_4x8_neon() 172 dst3_4x32b = vmovl_s16(vget_low_s16(src7_8x16b)); in ihevce_get_wt_inp_4x8_neon() 174 dst4_4x32b = vmovl_s16(vget_high_s16(src4_8x16b)); in ihevce_get_wt_inp_4x8_neon() 175 dst5_4x32b = vmovl_s16(vget_high_s16(src5_8x16b)); in ihevce_get_wt_inp_4x8_neon() 176 dst6_4x32b = vmovl_s16(vget_high_s16(src6_8x16b)); in ihevce_get_wt_inp_4x8_neon() 177 dst7_4x32b = vmovl_s16(vget_high_s16(src7_8x16b)); in ihevce_get_wt_inp_4x8_neon() 592 dst0_4x32b = vmovl_s16(vget_low_s16(src4_8x16b)); in hme_get_wt_inp_ctb_neon() 593 dst1_4x32b = vmovl_s16(vget_low_s16(src5_8x16b)); in hme_get_wt_inp_ctb_neon() [all …]
|
D | ihevce_common_utils_neon.c | 129 reg0[0] = vmovl_s16(vget_low_s16(a2)); in ihevce_wt_avg_2d_16x1_neon() 130 reg0[1] = vmovl_s16(vget_high_s16(a2)); in ihevce_wt_avg_2d_16x1_neon() 131 reg0[2] = vmovl_s16(vget_low_s16(a3)); in ihevce_wt_avg_2d_16x1_neon() 132 reg0[3] = vmovl_s16(vget_high_s16(a3)); in ihevce_wt_avg_2d_16x1_neon() 134 reg1[0] = vmovl_s16(vget_low_s16(a4)); in ihevce_wt_avg_2d_16x1_neon() 135 reg1[1] = vmovl_s16(vget_high_s16(a4)); in ihevce_wt_avg_2d_16x1_neon() 136 reg1[2] = vmovl_s16(vget_low_s16(a5)); in ihevce_wt_avg_2d_16x1_neon() 137 reg1[3] = vmovl_s16(vget_high_s16(a5)); in ihevce_wt_avg_2d_16x1_neon() 193 a8 = vmovl_s16(vget_low_s16(a0)); in ihevce_wt_avg_2d_8x1_neon() 194 a9 = vmovl_s16(vget_high_s16(a0)); in ihevce_wt_avg_2d_8x1_neon() [all …]
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/integer_ops/ |
D | add.h | 56 int32x4_t x11 = vmovl_s16(input1_val_low); in AddElementwise() 57 int32x4_t x12 = vmovl_s16(input1_val_high); in AddElementwise() 58 int32x4_t x21 = vmovl_s16(input2_val_low); in AddElementwise() 59 int32x4_t x22 = vmovl_s16(input2_val_high); in AddElementwise() 146 int32x4_t x11 = vmovl_s16(input1_val_low); in AddScalarBroadcast() 147 int32x4_t x12 = vmovl_s16(input1_val_high); in AddScalarBroadcast() 163 int32x4_t x21 = vmovl_s16(input2_val_low); in AddScalarBroadcast() 164 int32x4_t x22 = vmovl_s16(input2_val_high); in AddScalarBroadcast()
|
D | softmax.h | 111 int32x4_t input_diff_s32_0 = vmovl_s16(vget_low_s16(input_diff_s16)); in Softmax() 112 int32x4_t input_diff_s32_1 = vmovl_s16(vget_high_s16(input_diff_s16)); in Softmax() 178 int32x4_t input_diff_s32_0 = vmovl_s16(vget_low_s16(input_diff_s16)); in Softmax() 179 int32x4_t input_diff_s32_1 = vmovl_s16(vget_high_s16(input_diff_s16)); in Softmax()
|
D | mean.h | 77 vmovl_s16(vget_low_s16(input_data_low_shift)); in MeanImpl() 79 vmovl_s16(vget_high_s16(input_data_low_shift)); in MeanImpl() 81 vmovl_s16(vget_low_s16(input_data_high_shift)); in MeanImpl() 83 vmovl_s16(vget_high_s16(input_data_high_shift)); in MeanImpl()
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/ |
D | neon_tensor_utils.cc | 847 const int32x4_t first_half = vmovl_s16(vget_low_s16(output_val)); in NeonMatrixBatchVectorAccumulateImpl() 848 const int32x4_t second_half = vmovl_s16(vget_high_s16(output_val)); in NeonMatrixBatchVectorAccumulateImpl() 905 const int32x4_t output_val_1 = vmovl_s16(vget_low_s16(first_half)); in NeonMatrixBatchVectorAccumulateImpl() 906 const int32x4_t output_val_2 = vmovl_s16(vget_high_s16(first_half)); in NeonMatrixBatchVectorAccumulateImpl() 907 const int32x4_t output_val_3 = vmovl_s16(vget_low_s16(second_half)); in NeonMatrixBatchVectorAccumulateImpl() 908 const int32x4_t output_val_4 = vmovl_s16(vget_high_s16(second_half)); in NeonMatrixBatchVectorAccumulateImpl() 1359 const int32x4_t val_s32_0 = vmovl_s16(vget_low_s16(val_s16)); in NeonApplyLayerNorm() 1360 const int32x4_t val_s32_1 = vmovl_s16(vget_high_s16(val_s16)); in NeonApplyLayerNorm() 1401 vshlq_n_s32(vmovl_s16(vget_low_s16(val_s16_0)), 10), mean_dup); in NeonApplyLayerNorm() 1403 vshlq_n_s32(vmovl_s16(vget_high_s16(val_s16_0)), 10), mean_dup); in NeonApplyLayerNorm() [all …]
|
D | optimized_ops.h | 973 vmovl_s16(vget_low_s16(input_data_low_shift)); in MeanImpl() 975 vmovl_s16(vget_high_s16(input_data_low_shift)); in MeanImpl() 977 vmovl_s16(vget_low_s16(input_data_high_shift)); in MeanImpl() 979 vmovl_s16(vget_high_s16(input_data_high_shift)); in MeanImpl() 1756 int32x4_t x11 = vmovl_s16(input1_val_low); in AddElementwise() 1757 int32x4_t x12 = vmovl_s16(input1_val_high); in AddElementwise() 1758 int32x4_t x21 = vmovl_s16(input2_val_low); in AddElementwise() 1759 int32x4_t x22 = vmovl_s16(input2_val_high); in AddElementwise() 1847 int32x4_t x11 = vmovl_s16(input1_val_low); in AddScalarBroadcast() 1848 int32x4_t x12 = vmovl_s16(input1_val_high); in AddScalarBroadcast() [all …]
|
D | legacy_optimized_ops.h | 4062 int32x4_t input_diff_s32_0 = vmovl_s16(vget_low_s16(input_diff_s16)); in Softmax() 4063 int32x4_t input_diff_s32_1 = vmovl_s16(vget_high_s16(input_diff_s16)); in Softmax() 4130 int32x4_t input_diff_s32_0 = vmovl_s16(vget_low_s16(input_diff_s16)); in Softmax() 4131 int32x4_t input_diff_s32_1 = vmovl_s16(vget_high_s16(input_diff_s16)); in Softmax() 4301 vshlq_s32(vmovl_s16(vget_low_s16(input_val_centered_0)), in Logistic() 4304 vshlq_s32(vmovl_s16(vget_high_s16(input_val_centered_0)), in Logistic() 4307 vshlq_s32(vmovl_s16(vget_low_s16(input_val_centered_1)), in Logistic() 4310 vshlq_s32(vmovl_s16(vget_high_s16(input_val_centered_1)), in Logistic() 4496 vshlq_s32(vmovl_s16(vget_low_s16(input_val_centered_0)), in Tanh() 4499 vshlq_s32(vmovl_s16(vget_high_s16(input_val_centered_0)), in Tanh() [all …]
|
/external/libopus/silk/arm/ |
D | NSQ_neon.c | 86 int32x4_t coef0 = vmovl_s16(vget_low_s16(coef16)); in silk_NSQ_noise_shape_feedback_loop_neon() 87 int32x4_t coef1 = vmovl_s16(vget_high_s16(coef16)); in silk_NSQ_noise_shape_feedback_loop_neon()
|
D | NSQ_del_dec_neon_intr.c | 759 tmp1_s32x4 = vbslq_s32( t_u32x4, vmovl_s16( q1_Q10_s16x4 ), vmovl_s16( q2_Q10_s16x4 ) ); in silk_noise_shape_quantizer_del_dec_neon() 760 tmp2_s32x4 = vbslq_s32( t_u32x4, vmovl_s16( q2_Q10_s16x4 ), vmovl_s16( q1_Q10_s16x4 ) ); in silk_noise_shape_quantizer_del_dec_neon()
|
/external/libaom/libaom/av1/common/arm/ |
D | convolve_neon.h | 89 sum_0 = vmovl_s16(vget_low_s16(sum)); in wiener_convolve8_horiz_8x8() 90 sum_1 = vmovl_s16(vget_high_s16(sum)); in wiener_convolve8_horiz_8x8() 140 sum_0 = vmovl_s16(sum); in wiener_convolve8_horiz_4x8()
|
D | selfguided_neon.c | 436 q12345 = vaddq_s32(vmovl_s16(s1), q2345); in boxsum2() 437 q23456 = vaddq_s32(q2345, vmovl_s16(s6)); in boxsum2() 438 q34567 = vaddq_s32(q4567, vmovl_s16(s3)); in boxsum2() 439 q45678 = vaddq_s32(q4567, vmovl_s16(s8)); in boxsum2() 1073 a_res0 = vmulq_s32(vmovl_s16(vget_low_s16(s0)), a_res0); in final_filter_fast_internal() 1074 a_res1 = vmulq_s32(vmovl_s16(vget_high_s16(s0)), a_res1); in final_filter_fast_internal() 1099 a_res0 = vmulq_s32(vmovl_s16(vget_low_s16(s0)), a_res0); in final_filter_fast_internal() 1100 a_res1 = vmulq_s32(vmovl_s16(vget_high_s16(s0)), a_res1); in final_filter_fast_internal() 1151 a_res0 = vmulq_s32(vmovl_s16(vget_low_s16(s0)), a_res0); in final_filter_internal() 1152 a_res1 = vmulq_s32(vmovl_s16(vget_high_s16(s0)), a_res1); in final_filter_internal()
|
D | mem_neon.h | 533 const int32x4_t v0 = vmovl_s16(vget_low_s16(a)); in store_s16q_to_tran_low() 534 const int32x4_t v1 = vmovl_s16(vget_high_s16(a)); in store_s16q_to_tran_low()
|
D | jnt_convolve_neon.c | 44 dst0 = vsubq_s32(vreinterpretq_s32_u32(sum0), vmovl_s16(sub_const_vec)); in compute_avg_4x1() 78 const int32x4_t sub_const_vec = vmovl_s16(sub_const); in compute_avg_8x1() 133 const int32x4_t const_vec = vmovl_s16(sub_const_vec); in compute_avg_4x4() 214 const int32x4_t sub_const_vec = vmovl_s16(sub_const); in compute_avg_8x4()
|
/external/libvpx/libvpx/vpx_dsp/arm/ |
D | mem_neon.h | 78 const int32x4_t v0 = vmovl_s16(vget_low_s16(a)); in store_s16q_to_tran_low() 79 const int32x4_t v1 = vmovl_s16(vget_high_s16(a)); in store_s16q_to_tran_low()
|
D | fdct32x32_neon.c | 547 temp##_lo[temp_index] = vmovl_s16(vget_low_s16(a[a_index])); \ 548 temp##_hi[temp_index] = vmovl_s16(vget_high_s16(a[a_index])); \
|
/external/webrtc/webrtc/modules/audio_coding/codecs/isac/fix/source/ |
D | transform_neon.c | 396 int32x4_t tmpr_0 = vmovl_s16(vget_low_s16(tmpr)); in PostShiftAndDivideAndDemodulateNeon() 397 int32x4_t tmpi_0 = vmovl_s16(vget_low_s16(tmpi)); in PostShiftAndDivideAndDemodulateNeon() 402 int32x4_t tmpr_1 = vmovl_s16(vget_high_s16(tmpr)); in PostShiftAndDivideAndDemodulateNeon() 403 int32x4_t tmpi_1 = vmovl_s16(vget_high_s16(tmpi)); in PostShiftAndDivideAndDemodulateNeon()
|
/external/libaom/libaom/aom_dsp/simd/ |
D | v128_intrinsics_arm.h | 616 return vreinterpretq_s64_s32(vmovl_s16(vreinterpret_s16_s64(a))); in v128_unpack_s16_s32() 626 vmovl_s16(vreinterpret_s16_s64(vget_low_s64(a)))); in v128_unpacklo_s16_s32() 636 vmovl_s16(vreinterpret_s16_s64(vget_high_s64(a)))); in v128_unpackhi_s16_s32()
|
D | v64_intrinsics_arm.h | 506 return vreinterpret_s64_s32(vget_low_s32(vmovl_s16(vreinterpret_s16_s64(x)))); in v64_unpacklo_s16_s32() 515 vget_high_s32(vmovl_s16(vreinterpret_s16_s64(x)))); in v64_unpackhi_s16_s32()
|
/external/tensorflow/tensorflow/core/kernels/ |
D | quantization_utils.h | 554 const int32x4_t input_value_low_32x4 = vmovl_s16(input_value_low_16x4); in Requantize8x8To32Neon() 555 const int32x4_t input_value_high_32x4 = vmovl_s16(input_value_high_16x4); in Requantize8x8To32Neon()
|
/external/libhevc/common/arm/ |
D | ihevc_resi_trans_neon.c | 281 int32x4_t c3_4x32b = vmulq_s32(vmovl_s16(src2_4x16b), coeff2_4x32b); /* 74*r2 */ in ihevc_resi_trans_4x4_ttype1_neon()
|
/external/clang/test/CodeGen/ |
D | aarch64-neon-intrinsics.c | 7035 return vmovl_s16(a); in test_vmovl_s16()
|
D | arm_neon_intrinsics.c | 9833 return vmovl_s16(a); in test_vmovl_s16()
|
/external/neon_2_sse/ |
D | NEON_2_SSE.h | 1801 _NEON2SSESTORAGE int32x4_t vmovl_s16(int16x4_t a); // VMOVL.S16 q0,d0 12933 _NEON2SSESTORAGE int32x4_t vmovl_s16(int16x4_t a); // VMOVL.S16 q0,d0 12934 _NEON2SSE_INLINE int32x4_t vmovl_s16(int16x4_t a) in vmovl_s16() function
|