Home
last modified time | relevance | path

Searched refs:vsum (Results 1 – 25 of 94) sorted by relevance

1234

/external/libvpx/libvpx/vpx_dsp/x86/
Dvariance_sse2.c26 __m128i vsum = _mm_setzero_si128(); in vpx_get_mb_ss_sse2() local
31 vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v)); in vpx_get_mb_ss_sse2()
35 return add32x4_sse2(vsum); in vpx_get_mb_ss_sse2()
56 static INLINE void variance_final_128_pel_sse2(__m128i vsse, __m128i vsum, in variance_final_128_pel_sse2() argument
61 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); in variance_final_128_pel_sse2()
62 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); in variance_final_128_pel_sse2()
63 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); in variance_final_128_pel_sse2()
64 *sum = (int16_t)_mm_extract_epi16(vsum, 0); in variance_final_128_pel_sse2()
68 static INLINE void variance_final_256_pel_sse2(__m128i vsse, __m128i vsum, in variance_final_256_pel_sse2() argument
73 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); in variance_final_256_pel_sse2()
[all …]
Dvariance_avx2.c62 __m128i vsum, in variance_final_from_32bit_sum_avx2() argument
70 const __m128i sse_sum_lo = _mm_unpacklo_epi32(sse_reg_128, vsum); in variance_final_from_32bit_sum_avx2()
71 const __m128i sse_sum_hi = _mm_unpackhi_epi32(sse_reg_128, vsum); in variance_final_from_32bit_sum_avx2()
81 __m256i vsum, in variance_final_from_16bit_sum_avx2() argument
85 const __m128i sum_reg_128 = _mm_add_epi16(_mm256_castsi256_si128(vsum), in variance_final_from_16bit_sum_avx2()
86 _mm256_extractf128_si256(vsum, 1)); in variance_final_from_16bit_sum_avx2()
125 __m256i *const vsum) { in variance16_avx2() argument
127 *vsum = _mm256_setzero_si256(); in variance16_avx2()
131 variance16_kernel_avx2(src, src_stride, ref, ref_stride, vsse, vsum); in variance16_avx2()
140 __m256i *const vsum) { in variance32_avx2() argument
[all …]
/external/XNNPACK/src/f32-gavgpool-cw/
Dwasmsimd-arm-x4.c78 …const v128_t vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum01, vsum23, 0, 2, 4, 6), wasm_v32x4_shuf… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4() local
79 v128_t vout = wasm_f32x4_mul(vsum, vmultiplier); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4()
94 v128_t vsum = wasm_f64x2_splat(0.0); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4() local
99 vsum = wasm_f32x4_add(vsum, vi0); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4()
106 vsum = wasm_f32x4_add(vsum, vi0); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4()
109vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum, vsum, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum, vsum, 1… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4()
110vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum, vsum, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum, vsum, 1… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4()
112 v128_t vout = wasm_f32x4_mul(vsum, vmultiplier); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4()
Dwasmsimd-x86-x4.c78 …const v128_t vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum01, vsum23, 0, 2, 4, 6), wasm_v32x4_shuf… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4() local
79 v128_t vout = wasm_f32x4_mul(vsum, vmultiplier); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4()
96 v128_t vsum = wasm_f64x2_splat(0.0); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4() local
101 vsum = wasm_f32x4_add(vsum, vi0); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4()
108 vsum = wasm_f32x4_add(vsum, vi0); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4()
111vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum, vsum, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum, vsum, 1… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4()
112vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum, vsum, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum, vsum, 1… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4()
114 v128_t vout = wasm_f32x4_mul(vsum, vmultiplier); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4()
Dsse-x4.c78 const __m128 vsum = _mm_add_ps(_mm_movelh_ps(vsum01, vsum23), _mm_movehl_ps(vsum23, vsum01)); in xnn_f32_gavgpool_cw_ukernel__sse_x4() local
79 __m128 vout = _mm_mul_ps(vsum, vmultiplier); in xnn_f32_gavgpool_cw_ukernel__sse_x4()
94 __m128 vsum = _mm_setzero_ps(); in xnn_f32_gavgpool_cw_ukernel__sse_x4() local
99 vsum = _mm_add_ps(vsum, vi0); in xnn_f32_gavgpool_cw_ukernel__sse_x4()
106 vsum = _mm_add_ps(vsum, vi0); in xnn_f32_gavgpool_cw_ukernel__sse_x4()
109 vsum = _mm_add_ps(vsum, _mm_movehl_ps(vsum, vsum)); in xnn_f32_gavgpool_cw_ukernel__sse_x4()
110 vsum = _mm_add_ss(vsum, _mm_shuffle_ps(vsum, vsum, _MM_SHUFFLE(3, 2, 1, 1))); in xnn_f32_gavgpool_cw_ukernel__sse_x4()
112 __m128 vout = _mm_mul_ss(vsum, vmultiplier); in xnn_f32_gavgpool_cw_ukernel__sse_x4()
Dneon-x4.c76 const float32x4_t vsum = vpaddq_f32(vsum01, vsum23); in xnn_f32_gavgpool_cw_ukernel__neon_x4() local
82 const float32x4_t vsum = vcombine_f32(vpadd_f32(vget_low_f32(vsum01), vget_high_f32(vsum01)), in xnn_f32_gavgpool_cw_ukernel__neon_x4() local
86 float32x4_t vout = vmulq_f32(vsum, vmultiplier); in xnn_f32_gavgpool_cw_ukernel__neon_x4()
114 float32x2_t vsum = vadd_f32(vget_low_f32(vsum0), vget_high_f32(vsum0)); in xnn_f32_gavgpool_cw_ukernel__neon_x4() local
115 vsum = vpadd_f32(vsum, vsum); in xnn_f32_gavgpool_cw_ukernel__neon_x4()
117 float32x2_t vout = vmul_f32(vsum, vget_low_f32(vmultiplier)); in xnn_f32_gavgpool_cw_ukernel__neon_x4()
/external/llvm-project/llvm/test/MC/VE/
DVSUMX.s6 # CHECK-INST: vsum.l %v11, %v12
8 vsum.l %v11, %v12
10 # CHECK-INST: vsum.l %v11, %vix, %vm11
12 vsum.l %v11, %vix, %vm11
14 # CHECK-INST: vsum.l %vix, %v22, %vm15
16 vsum.l %vix, %v22, %vm15
18 # CHECK-INST: vsum.l %v63, %v60, %vm2
20 vsum.l %v63, %v60, %vm2
22 # CHECK-INST: vsum.l %vix, %vix
24 vsum.l %vix, %vix, %vm0
[all …]
DVSUMS.s6 # CHECK-INST: vsum.w.sx %v11, %v12
8 vsum.w.sx %v11, %v12
10 # CHECK-INST: vsum.w.sx %v11, %vix, %vm11
12 vsum.w.sx %v11, %vix, %vm11
14 # CHECK-INST: vsum.w.sx %vix, %v22, %vm15
16 vsum.w.sx %vix, %v22, %vm15
18 # CHECK-INST: vsum.w.zx %v63, %v60, %vm2
20 vsum.w.zx %v63, %v60, %vm2
22 # CHECK-INST: vsum.w.zx %vix, %vix
24 vsum.w.zx %vix, %vix, %vm0
[all …]
/external/arm-optimized-routines/networking/arm/
Dchksum_simd.c21 uint64x1_t vsum = { 0 }; in __chksum_arm_simd() local
44 vsum = vpaddl_u32(vtmp); in __chksum_arm_simd()
97 vsum = vpadal_u32(vsum, vtmp); in __chksum_arm_simd()
105 vsum = vpadal_u32(vsum, vtmp); in __chksum_arm_simd()
125 vsum = vpadal_u32(vsum, vreinterpret_u32_u64(vword64)); in __chksum_arm_simd()
129 vsum = vpaddl_u32(vreinterpret_u32_u64(vsum)); in __chksum_arm_simd()
130 vsum = vpaddl_u32(vreinterpret_u32_u64(vsum)); in __chksum_arm_simd()
131 Assert(vget_lane_u32(vreinterpret_u32_u64(vsum), 1) == 0); in __chksum_arm_simd()
134 uint32x2_t vsum32 = vreinterpret_u32_u64(vsum); in __chksum_arm_simd()
/external/llvm-project/libc/AOR_v20.02/networking/arm/
Dchksum_simd.c22 uint64x1_t vsum = { 0 }; in __chksum_arm_simd() local
45 vsum = vpaddl_u32(vtmp); in __chksum_arm_simd()
98 vsum = vpadal_u32(vsum, vtmp); in __chksum_arm_simd()
106 vsum = vpadal_u32(vsum, vtmp); in __chksum_arm_simd()
126 vsum = vpadal_u32(vsum, vreinterpret_u32_u64(vword64)); in __chksum_arm_simd()
130 vsum = vpaddl_u32(vreinterpret_u32_u64(vsum)); in __chksum_arm_simd()
131 vsum = vpaddl_u32(vreinterpret_u32_u64(vsum)); in __chksum_arm_simd()
132 Assert(vget_lane_u32(vreinterpret_u32_u64(vsum), 1) == 0); in __chksum_arm_simd()
135 uint32x2_t vsum32 = vreinterpret_u32_u64(vsum); in __chksum_arm_simd()
/external/libaom/libaom/aom_dsp/x86/
Dvariance_sse2.c30 __m128i vsum = _mm_setzero_si128(); in aom_get_mb_ss_sse2() local
35 vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v)); in aom_get_mb_ss_sse2()
39 vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8)); in aom_get_mb_ss_sse2()
40 vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4)); in aom_get_mb_ss_sse2()
41 return _mm_cvtsi128_si32(vsum); in aom_get_mb_ss_sse2()
80 static INLINE void variance_final_128_pel_sse2(__m128i vsse, __m128i vsum, in variance_final_128_pel_sse2() argument
85 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); in variance_final_128_pel_sse2()
86 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); in variance_final_128_pel_sse2()
87 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); in variance_final_128_pel_sse2()
88 *sum = (int16_t)_mm_extract_epi16(vsum, 0); in variance_final_128_pel_sse2()
[all …]
Dvariance_avx2.c48 static INLINE int variance_final_from_32bit_sum_avx2(__m256i vsse, __m128i vsum, in variance_final_from_32bit_sum_avx2() argument
54 const __m128i sse_sum_lo = _mm_unpacklo_epi32(sse_reg_128, vsum); in variance_final_from_32bit_sum_avx2()
55 const __m128i sse_sum_hi = _mm_unpackhi_epi32(sse_reg_128, vsum); in variance_final_from_32bit_sum_avx2()
65 static INLINE int variance_final_512_avx2(__m256i vsse, __m256i vsum, in variance_final_512_avx2() argument
68 const __m128i vsum_128 = mm256_add_hi_lo_epi16(vsum); in variance_final_512_avx2()
75 static INLINE int variance_final_1024_avx2(__m256i vsse, __m256i vsum, in variance_final_1024_avx2() argument
78 const __m128i vsum_128 = mm256_add_hi_lo_epi16(vsum); in variance_final_1024_avx2()
93 static INLINE int variance_final_2048_avx2(__m256i vsse, __m256i vsum, in variance_final_2048_avx2() argument
95 vsum = sum_to_32bit_avx2(vsum); in variance_final_2048_avx2()
96 const __m128i vsum_128 = mm256_add_hi_lo_epi32(vsum); in variance_final_2048_avx2()
[all …]
/external/webrtc/modules/video_processing/util/
Ddenoiser_filter_sse2.cc26 __m128i vsum = _mm_setzero_si128(); in Get8x8varSse2() local
42 vsum = _mm_add_epi16(vsum, diff0); in Get8x8varSse2()
43 vsum = _mm_add_epi16(vsum, diff1); in Get8x8varSse2()
49 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); in Get8x8varSse2()
50 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); in Get8x8varSse2()
51 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); in Get8x8varSse2()
52 *sum = static_cast<int16_t>(_mm_extract_epi16(vsum, 0)); in Get8x8varSse2()
/external/XNNPACK/src/u8-lut32norm/
Dscalar.c23 uint32_t vsum = 0; in compute_sum() local
26 vsum += t[vx]; in compute_sum()
28 return vsum; in compute_sum()
39 const uint32_t vsum = compute_sum(n, x, t); in xnn_u8_lut32norm_ukernel__scalar() local
40 assert(vsum != 0); in xnn_u8_lut32norm_ukernel__scalar()
42 struct fxdiv_divisor_uint32_t vsum_divisor = fxdiv_init_uint32_t(vsum); in xnn_u8_lut32norm_ukernel__scalar()
43 const uint32_t vrounding = (vsum >> 1); in xnn_u8_lut32norm_ukernel__scalar()
/external/python/pybind11/tests/
Dtest_stl_binders.py213 vsum = 0
216 vsum += v.value
218 assert vsum == 150
225 vsum = 0
228 vsum += v.value
230 assert vsum == 150
249 vsum = 0
253 vsum += v_i.value
255 assert vsum == 7500
263 vsum = 0
[all …]
/external/libmpeg2/common/x86/
Dicv_variance_ssse3.c94 __m128i vsum, vsum_sqr; in icv_variance_8x4_ssse3() local
128 vsum = _mm_add_epi64(sum_r0, sum_r1); in icv_variance_8x4_ssse3()
129 vsum = _mm_add_epi64(vsum, _mm_srli_si128(vsum, 8)); in icv_variance_8x4_ssse3()
131 sum = _mm_cvtsi128_si32(vsum); in icv_variance_8x4_ssse3()
/external/XNNPACK/src/f32-gavgpool/
D7p7x-minmax-scalar-c1.c52 const float vsum = vsum016 + vsum2345; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local
54 *b++ = vsum; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1()
86 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local
88 *b++ = vsum; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1()
140 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local
142 float vout = vsum * vscale; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1()
D7p7x-minmax-wasm-c1.c52 const float vsum = vsum016 + vsum2345; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local
54 *b++ = vsum; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1()
86 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local
88 *b++ = vsum; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1()
140 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local
142 float vout = vsum * vscale; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1()
D7p7x-minmax-neon-c4.c54 const float32x4_t vsum = vaddq_f32(vsum016, vsum2345); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
56 vst1q_f32(b, vsum); b += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4()
87 const float32x4_t vsum = vaddq_f32(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
89 vst1q_f32(b, vsum); b += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4()
141 const float32x4_t vsum = vaddq_f32(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
143 float32x4_t vout = vmulq_f32(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4()
169 const float32x4_t vsum = vaddq_f32(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
171 float32x4_t vout = vmulq_f32(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4()
D7p7x-minmax-wasmsimd-arm-c4.c61 const v128_t vsum = wasm_f32x4_add(vsum016, vsum2345); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
63 wasm_v128_store(b, vsum); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4()
102 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
104 wasm_v128_store(b, vsum); b += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4()
164 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
166 v128_t vout = wasm_f32x4_mul(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4()
193 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
195 v128_t vout = wasm_f32x4_mul(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4()
D7p7x-minmax-sse-c4.c61 const __m128 vsum = _mm_add_ps(vsum016, vsum2345); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
63 _mm_store_ps(b, vsum); b += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
101 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
103 _mm_store_ps(b, vsum); b += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
163 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
165 __m128 vout = _mm_mul_ps(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
192 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
194 __m128 vout = _mm_mul_ps(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
D7p7x-minmax-wasmsimd-x86-c4.c61 const v128_t vsum = wasm_f32x4_add(vsum016, vsum2345); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
63 wasm_v128_store(b, vsum); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4()
102 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
104 wasm_v128_store(b, vsum); b += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4()
164 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
166 v128_t vout = wasm_f32x4_mul(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4()
193 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
195 v128_t vout = wasm_f32x4_mul(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4()
/external/XNNPACK/src/f16-gavgpool/
D7p7x-minmax-neonfp16arith-c8.c55 const float16x8_t vsum = vaddq_f16(vsum016, vsum2345); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local
57 vst1q_f16(b, vsum); b += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8()
88 const float16x8_t vsum = vaddq_f16(vsum0123, vsum456a); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local
90 vst1q_f16(b, vsum); b += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8()
142 const float16x8_t vsum = vaddq_f16(vsum0123, vsum456a); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local
144 float16x8_t vout = vmulq_f16(vsum, vscale); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8()
170 const float16x8_t vsum = vaddq_f16(vsum0123, vsum456a); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local
172 float16x8_t vout = vmulq_f16(vsum, vscale); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8()
/external/XNNPACK/src/qu8-gavgpool/
D7p7x-minmax-neon-c8.c59 const int16x8_t vsum = vreinterpretq_s16_u16(vaddq_u16(vsum016, vsum2345)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() local
61 const int32x4_t vacc_lo = vaddw_s16(vbias, vget_low_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
62 const int32x4_t vacc_hi = vaddw_s16(vbias, vget_high_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
96 const int16x8_t vsum = vreinterpretq_s16_u16(vaddq_u16(vsum016, vsum2345)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() local
98 vst1q_s32(acc, vaddw_s16(vacc_lo, vget_low_s16(vsum))); acc += 4; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
99 vst1q_s32(acc, vaddw_s16(vacc_hi, vget_high_s16(vsum))); acc += 4; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
158 const int16x8_t vsum = vreinterpretq_s16_u16(vaddq_u16(vsum016, vsum2345)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() local
159 vacc_lo = vaddw_s16(vacc_lo, vget_low_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
160 vacc_hi = vaddw_s16(vacc_hi, vget_high_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
230 const int16x8_t vsum = vreinterpretq_s16_u16(vaddq_u16(vsum016, vsum2345)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() local
[all …]
D7p7x-minmax-scalar-c1.c56 const uint32_t vsum = vsum016 + vsum2345; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__scalar_c1() local
57 const int32_t vacc = vbias + (int32_t) vsum; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__scalar_c1()
90 const uint32_t vsum = vsum016 + vsum2345; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__scalar_c1() local
92 *b++ += (int32_t) vsum; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__scalar_c1()
150 const uint32_t vsum = vsum016 + vsum2345; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__scalar_c1() local
151 vacc += (int32_t) vsum; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__scalar_c1()

1234