/external/libgav1/libgav1/src/dsp/arm/ |
D | warp_neon.cc | 75 sum = vmlal_s8(sum, filter[0], src_row_window); in HorizontalFilter() 78 sum = vmlal_s8(sum, filter[1], src_row_window); in HorizontalFilter() 81 sum = vmlal_s8(sum, filter[2], src_row_window); in HorizontalFilter() 84 sum = vmlal_s8(sum, filter[3], src_row_window); in HorizontalFilter() 87 sum = vmlal_s8(sum, filter[4], src_row_window); in HorizontalFilter() 90 sum = vmlal_s8(sum, filter[5], src_row_window); in HorizontalFilter() 93 sum = vmlal_s8(sum, filter[6], src_row_window); in HorizontalFilter() 96 sum = vmlal_s8(sum, filter[7], src_row_window); in HorizontalFilter()
|
D | film_grain_neon.cc | 1005 const int16x8_t grain = vmlal_s8(weighted_grain, old_coeff, source_old); in WriteOverlapLine8bpp_NEON()
|
/external/libvpx/libvpx/vp8/common/arm/neon/ |
D | mbloopfilter_neon.c | 119 q0s16 = vmlal_s8(vreinterpretq_s16_u16(q0u16), vget_low_s8(q1s8), d5); in vp8_mbloop_filter_neon() 120 q11s16 = vmlal_s8(vreinterpretq_s16_u16(q11u16), vget_high_s8(q1s8), d5); in vp8_mbloop_filter_neon() 122 q12s16 = vmlal_s8(vreinterpretq_s16_u16(q12u16), vget_low_s8(q1s8), d4); in vp8_mbloop_filter_neon() 123 q13s16 = vmlal_s8(vreinterpretq_s16_u16(q13u16), vget_high_s8(q1s8), d4); in vp8_mbloop_filter_neon() 124 q14s16 = vmlal_s8(vreinterpretq_s16_u16(q14u16), vget_low_s8(q1s8), d5); in vp8_mbloop_filter_neon() 125 q15s16 = vmlal_s8(vreinterpretq_s16_u16(q15u16), vget_high_s8(q1s8), d5); in vp8_mbloop_filter_neon()
|
/external/libaom/libaom/aom_dsp/arm/ |
D | loopfilter_neon.c | 202 filter_s16 = vmlal_s8(filter_s16, temp_s8, val_3); in lpf_14_neon() 383 filter_s16 = vmlal_s8(filter_s16, temp_s8, val_3); in lpf_8_neon() 498 filter_s16 = vmlal_s8(filter_s16, temp_s8, val_3); in lpf_6_neon() 596 filter_s16 = vmlal_s8(filter_s16, temp_s8, val_3); in lpf_4_neon()
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/ |
D | legacy_optimized_ops.h | 1490 local_accum00 = vmlal_s8(local_accum00, vget_high_s8(weights00), in GEMVForLstmCellWithSymmetricRange() 1492 local_accum01 = vmlal_s8(local_accum01, vget_high_s8(weights01), in GEMVForLstmCellWithSymmetricRange() 1494 local_accum10 = vmlal_s8(local_accum10, vget_high_s8(weights10), in GEMVForLstmCellWithSymmetricRange() 1496 local_accum11 = vmlal_s8(local_accum11, vget_high_s8(weights11), in GEMVForLstmCellWithSymmetricRange() 1511 local_accum00 = vmlal_s8(local_accum00, vget_high_s8(weights02), in GEMVForLstmCellWithSymmetricRange() 1513 local_accum01 = vmlal_s8(local_accum01, vget_high_s8(weights03), in GEMVForLstmCellWithSymmetricRange() 1515 local_accum10 = vmlal_s8(local_accum10, vget_high_s8(weights12), in GEMVForLstmCellWithSymmetricRange() 1517 local_accum11 = vmlal_s8(local_accum11, vget_high_s8(weights13), in GEMVForLstmCellWithSymmetricRange() 1561 local_accum00 = vmlal_s8(local_accum00, vget_high_s8(weights00), in GEMVForLstmCellWithSymmetricRange() 1563 local_accum01 = vmlal_s8(local_accum01, vget_high_s8(weights01), in GEMVForLstmCellWithSymmetricRange() [all …]
|
D | neon_tensor_utils.cc | 787 vmlal_s8(prod_16x8, vget_high_s8(s1_8x16), vget_high_s8(s2_8x16)); in NeonMatrixBatchVectorMultiplyImpl() 1113 vmlal_s8(prod_16x8, vget_high_s8(s1_8x16), vget_high_s8(s2_8x16)); in NeonMatrixBatchVectorMultiplyAccumulate() 1277 vmlal_s8(prod_16x8, vget_high_s8(s1_8x16), vget_high_s8(s2_8x16)); in NeonMatrixBatchVectorMultiplyAccumulate() 1850 vmlal_s8(prod_16x8, vget_high_s8(s1_8x16), vget_high_s8(s2_8x16)); in NeonSparseMatrixBatchVectorMultiplyAccumulate()
|
D | optimized_ops.h | 466 vmlal_s8(local_accum0, vget_high_s8(weights0), vget_high_s8(input)); in ShuffledFullyConnectedWorkerImpl() 468 vmlal_s8(local_accum1, vget_high_s8(weights1), vget_high_s8(input)); in ShuffledFullyConnectedWorkerImpl() 470 vmlal_s8(local_accum2, vget_high_s8(weights2), vget_high_s8(input)); in ShuffledFullyConnectedWorkerImpl() 472 vmlal_s8(local_accum3, vget_high_s8(weights3), vget_high_s8(input)); in ShuffledFullyConnectedWorkerImpl() 548 vmlal_s8(local_accum0, vget_high_s8(weights0), vget_high_s8(input##B)); \ in ShuffledFullyConnectedWorkerImpl() 550 vmlal_s8(local_accum1, vget_high_s8(weights1), vget_high_s8(input##B)); \ in ShuffledFullyConnectedWorkerImpl() 552 vmlal_s8(local_accum2, vget_high_s8(weights2), vget_high_s8(input##B)); \ in ShuffledFullyConnectedWorkerImpl() 554 vmlal_s8(local_accum3, vget_high_s8(weights3), vget_high_s8(input##B)); \ in ShuffledFullyConnectedWorkerImpl()
|
/external/webp/src/dsp/ |
D | dec_neon.c | 779 const int16x8_t S_lo = vmlal_s8(kCstm1, kCst9, delta_lo); // S = 9 * a - 1 in ApplyFilter6_NEON() 780 const int16x8_t S_hi = vmlal_s8(kCstm1, kCst9, delta_hi); in ApplyFilter6_NEON() 781 const int16x8_t Z_lo = vmlal_s8(S_lo, kCst18, delta_lo); // S + 18 * a in ApplyFilter6_NEON() 782 const int16x8_t Z_hi = vmlal_s8(S_hi, kCst18, delta_hi); in ApplyFilter6_NEON()
|
/external/gemmlowp/standalone/ |
D | neon-gemm-kernel-benchmark.cc | 3711 vmlal_s8(local_acc, vget_high_s8(lhs[i]), vget_high_s8(rhs[j])); in Run()
|
/external/clang/test/CodeGen/ |
D | aarch64-neon-intrinsics.c | 8835 return vmlal_s8(a, b, c); in test_vmlal_s8()
|
D | arm_neon_intrinsics.c | 8926 return vmlal_s8(a, b, c); in test_vmlal_s8()
|
/external/neon_2_sse/ |
D | NEON_2_SSE.h | 579 _NEON2SSESTORAGE int16x8_t vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c); // VMLAL.S8 q0,d0,d0 3958 _NEON2SSESTORAGE int16x8_t vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c); // VMLAL.S8 q0,d0,d0 3959 _NEON2SSE_INLINE int16x8_t vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) // VMLAL.S8 q0,d0,d0 in vmlal_s8() function
|