Searched refs:sum_vec (Results 1 – 3 of 3) sorted by relevance
/external/ComputeLibrary/src/cpu/kernels/meanstddevnorm/generic/neon/ |
D | qasymm8.cpp | 78 uint32x4_t sum_vec = vdupq_n_u32(0); in neon_qasymm8_meanstddevnorm() local 84 sum_vec = vaddq_u32(sum_vec, vpaddlq_u16(vpaddlq_u8(data))); in neon_qasymm8_meanstddevnorm() 91 sum_vec = vpaddq_u32(sum_vec, sum_vec); in neon_qasymm8_meanstddevnorm() 92 sum_vec = vpaddq_u32(sum_vec, sum_vec); in neon_qasymm8_meanstddevnorm() 93 uint32_t sum = vgetq_lane_u32(sum_vec, 0); in neon_qasymm8_meanstddevnorm() 98 uint32_t sum = vgetq_lane_u32(sum_vec, 0) + in neon_qasymm8_meanstddevnorm() 99 vgetq_lane_u32(sum_vec, 1) + in neon_qasymm8_meanstddevnorm() 100 vgetq_lane_u32(sum_vec, 2) + in neon_qasymm8_meanstddevnorm() 101 vgetq_lane_u32(sum_vec, 3); in neon_qasymm8_meanstddevnorm()
|
D | impl.cpp | 54 auto sum_vec = wrapper::vdup_n(static_cast<ScalarType>(0.f), ExactTagType{}); in mean_stddev_normalization() local 60 sum_vec = wrapper::vadd(sum_vec, data); in mean_stddev_normalization() 64 … auto sum_carry_res = wrapper::vpadd(wrapper::vgethigh(sum_vec), wrapper::vgetlow(sum_vec)); in mean_stddev_normalization() 126 float16x8_t sum_vec = vdupq_n_f16(static_cast<float16_t>(0.0f)); in mean_stddev_normalization() local 132 sum_vec = vaddq_f16(sum_vec, data); in mean_stddev_normalization() 139 float16x4_t sum_carry_res = vpadd_f16(vget_high_f16(sum_vec), vget_low_f16(sum_vec)); in mean_stddev_normalization()
|
/external/libaom/aom_dsp/flow_estimation/x86/ |
D | corner_match_avx2.c | 35 __m256i temp1, sum_vec, sumsq2_vec, cross_vec, v, v1_1, v2_1; in av1_compute_cross_correlation_avx2() local 40 sum_vec = zero; in av1_compute_cross_correlation_avx2() 56 sum_vec = _mm256_add_epi16(sum_vec, _mm256_sad_epu8(v, zero)); in av1_compute_cross_correlation_avx2() 61 __m256i sum_vec1 = _mm256_srli_si256(sum_vec, 8); in av1_compute_cross_correlation_avx2() 62 sum_vec = _mm256_add_epi32(sum_vec, sum_vec1); in av1_compute_cross_correlation_avx2() 63 int sum1_acc = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_vec)); in av1_compute_cross_correlation_avx2() 64 int sum2_acc = _mm256_extract_epi32(sum_vec, 4); in av1_compute_cross_correlation_avx2()
|