Home
last modified time | relevance | path

Searched refs:vsum (Results 1 – 25 of 107) sorted by relevance

12345

/external/libvpx/vpx_dsp/x86/
Dvariance_sse2.c26 __m128i vsum = _mm_setzero_si128(); in vpx_get_mb_ss_sse2() local
31 vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v)); in vpx_get_mb_ss_sse2()
35 return add32x4_sse2(vsum); in vpx_get_mb_ss_sse2()
56 static INLINE void variance_final_128_pel_sse2(__m128i vsse, __m128i vsum, in variance_final_128_pel_sse2() argument
61 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); in variance_final_128_pel_sse2()
62 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); in variance_final_128_pel_sse2()
63 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); in variance_final_128_pel_sse2()
64 *sum = (int16_t)_mm_extract_epi16(vsum, 0); in variance_final_128_pel_sse2()
68 static INLINE void variance_final_256_pel_sse2(__m128i vsse, __m128i vsum, in variance_final_256_pel_sse2() argument
73 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); in variance_final_256_pel_sse2()
[all …]
Dvariance_avx2.c62 __m128i vsum, in variance_final_from_32bit_sum_avx2() argument
70 const __m128i sse_sum_lo = _mm_unpacklo_epi32(sse_reg_128, vsum); in variance_final_from_32bit_sum_avx2()
71 const __m128i sse_sum_hi = _mm_unpackhi_epi32(sse_reg_128, vsum); in variance_final_from_32bit_sum_avx2()
81 __m256i vsum, in variance_final_from_16bit_sum_avx2() argument
85 const __m128i sum_reg_128 = _mm_add_epi16(_mm256_castsi256_si128(vsum), in variance_final_from_16bit_sum_avx2()
86 _mm256_extractf128_si256(vsum, 1)); in variance_final_from_16bit_sum_avx2()
160 __m256i *const vsum) { in variance8_avx2() argument
162 *vsum = _mm256_setzero_si256(); in variance8_avx2()
166 variance8_kernel_avx2(src, src_stride, ref, ref_stride, vsse, vsum); in variance8_avx2()
175 __m256i *const vsum) { in variance16_avx2() argument
[all …]
/external/libaom/aom_dsp/x86/
Dvariance_sse2.c25 __m128i vsum = _mm_setzero_si128(); in aom_get_mb_ss_sse2() local
30 vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v)); in aom_get_mb_ss_sse2()
34 vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8)); in aom_get_mb_ss_sse2()
35 vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4)); in aom_get_mb_ss_sse2()
36 return (unsigned int)_mm_cvtsi128_si32(vsum); in aom_get_mb_ss_sse2()
82 static inline void variance_final_128_pel_sse2(__m128i vsse, __m128i vsum, in variance_final_128_pel_sse2() argument
87 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); in variance_final_128_pel_sse2()
88 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); in variance_final_128_pel_sse2()
89 vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); in variance_final_128_pel_sse2()
90 *sum = (int16_t)_mm_extract_epi16(vsum, 0); in variance_final_128_pel_sse2()
[all …]
Dvariance_avx2.c49 static inline int variance_final_from_32bit_sum_avx2(__m256i vsse, __m128i vsum, in variance_final_from_32bit_sum_avx2() argument
55 const __m128i sse_sum_lo = _mm_unpacklo_epi32(sse_reg_128, vsum); in variance_final_from_32bit_sum_avx2()
56 const __m128i sse_sum_hi = _mm_unpackhi_epi32(sse_reg_128, vsum); in variance_final_from_32bit_sum_avx2()
66 static inline int variance_final_512_avx2(__m256i vsse, __m256i vsum, in variance_final_512_avx2() argument
69 const __m128i vsum_128 = mm256_add_hi_lo_epi16(vsum); in variance_final_512_avx2()
76 static inline int variance_final_1024_avx2(__m256i vsse, __m256i vsum, in variance_final_1024_avx2() argument
79 const __m128i vsum_128 = mm256_add_hi_lo_epi16(vsum); in variance_final_1024_avx2()
94 static inline int variance_final_2048_avx2(__m256i vsse, __m256i vsum, in variance_final_2048_avx2() argument
96 vsum = sum_to_32bit_avx2(vsum); in variance_final_2048_avx2()
97 const __m128i vsum_128 = mm256_add_hi_lo_epi32(vsum); in variance_final_2048_avx2()
[all …]
/external/XNNPACK/src/f32-gavgpool-cw/
Dwasmsimd-x86-x4.c78 …const v128_t vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum01, vsum23, 0, 2, 4, 6), wasm_v32x4_shuf… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4() local
79 v128_t vout = wasm_f32x4_mul(vsum, vmultiplier); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4()
94 v128_t vsum = wasm_f32x4_const_splat(0.0f); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4() local
99 vsum = wasm_f32x4_add(vsum, vi0); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4()
106 vsum = wasm_f32x4_add(vsum, vi0); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4()
109vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum, vsum, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum, vsum, 1… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4()
110vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum, vsum, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum, vsum, 1… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4()
112 v128_t vout = wasm_f32x4_mul(vsum, vmultiplier); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4()
Dwasmsimd-arm-x4.c78 …const v128_t vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum01, vsum23, 0, 2, 4, 6), wasm_v32x4_shuf… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4() local
79 v128_t vout = wasm_f32x4_mul(vsum, vmultiplier); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4()
94 v128_t vsum = wasm_f32x4_const_splat(0.0f); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4() local
99 vsum = wasm_f32x4_add(vsum, vi0); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4()
106 vsum = wasm_f32x4_add(vsum, vi0); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4()
109vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum, vsum, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum, vsum, 1… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4()
110vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum, vsum, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum, vsum, 1… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4()
112 v128_t vout = wasm_f32x4_mul(vsum, vmultiplier); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4()
Dsse-x4.c78 const __m128 vsum = _mm_add_ps(_mm_movelh_ps(vsum01, vsum23), _mm_movehl_ps(vsum23, vsum01)); in xnn_f32_gavgpool_cw_ukernel__sse_x4() local
79 __m128 vout = _mm_mul_ps(vsum, vmultiplier); in xnn_f32_gavgpool_cw_ukernel__sse_x4()
94 __m128 vsum = _mm_setzero_ps(); in xnn_f32_gavgpool_cw_ukernel__sse_x4() local
99 vsum = _mm_add_ps(vsum, vi0); in xnn_f32_gavgpool_cw_ukernel__sse_x4()
106 vsum = _mm_add_ps(vsum, vi0); in xnn_f32_gavgpool_cw_ukernel__sse_x4()
109 vsum = _mm_add_ps(vsum, _mm_movehl_ps(vsum, vsum)); in xnn_f32_gavgpool_cw_ukernel__sse_x4()
110 vsum = _mm_add_ss(vsum, _mm_shuffle_ps(vsum, vsum, _MM_SHUFFLE(3, 2, 1, 1))); in xnn_f32_gavgpool_cw_ukernel__sse_x4()
112 __m128 vout = _mm_mul_ss(vsum, vmultiplier); in xnn_f32_gavgpool_cw_ukernel__sse_x4()
Dneon-x4.c76 const float32x4_t vsum = vpaddq_f32(vsum01, vsum23); in xnn_f32_gavgpool_cw_ukernel__neon_x4() local
82 const float32x4_t vsum = vcombine_f32(vpadd_f32(vget_low_f32(vsum01), vget_high_f32(vsum01)), in xnn_f32_gavgpool_cw_ukernel__neon_x4() local
86 float32x4_t vout = vmulq_f32(vsum, vmultiplier); in xnn_f32_gavgpool_cw_ukernel__neon_x4()
114 float32x2_t vsum = vadd_f32(vget_low_f32(vsum0), vget_high_f32(vsum0)); in xnn_f32_gavgpool_cw_ukernel__neon_x4() local
115 vsum = vpadd_f32(vsum, vsum); in xnn_f32_gavgpool_cw_ukernel__neon_x4()
117 float32x2_t vout = vmul_f32(vsum, vget_low_f32(vmultiplier)); in xnn_f32_gavgpool_cw_ukernel__neon_x4()
/external/cronet/tot/third_party/llvm-libc/src/AOR_v20.02/networking/arm/
Dchksum_simd.c22 uint64x1_t vsum = { 0 }; in __chksum_arm_simd() local
45 vsum = vpaddl_u32(vtmp); in __chksum_arm_simd()
98 vsum = vpadal_u32(vsum, vtmp); in __chksum_arm_simd()
106 vsum = vpadal_u32(vsum, vtmp); in __chksum_arm_simd()
126 vsum = vpadal_u32(vsum, vreinterpret_u32_u64(vword64)); in __chksum_arm_simd()
130 vsum = vpaddl_u32(vreinterpret_u32_u64(vsum)); in __chksum_arm_simd()
131 vsum = vpaddl_u32(vreinterpret_u32_u64(vsum)); in __chksum_arm_simd()
132 Assert(vget_lane_u32(vreinterpret_u32_u64(vsum), 1) == 0); in __chksum_arm_simd()
135 uint32x2_t vsum32 = vreinterpret_u32_u64(vsum); in __chksum_arm_simd()
/external/cronet/stable/third_party/llvm-libc/src/AOR_v20.02/networking/arm/
Dchksum_simd.c22 uint64x1_t vsum = { 0 }; in __chksum_arm_simd() local
45 vsum = vpaddl_u32(vtmp); in __chksum_arm_simd()
98 vsum = vpadal_u32(vsum, vtmp); in __chksum_arm_simd()
106 vsum = vpadal_u32(vsum, vtmp); in __chksum_arm_simd()
126 vsum = vpadal_u32(vsum, vreinterpret_u32_u64(vword64)); in __chksum_arm_simd()
130 vsum = vpaddl_u32(vreinterpret_u32_u64(vsum)); in __chksum_arm_simd()
131 vsum = vpaddl_u32(vreinterpret_u32_u64(vsum)); in __chksum_arm_simd()
132 Assert(vget_lane_u32(vreinterpret_u32_u64(vsum), 1) == 0); in __chksum_arm_simd()
135 uint32x2_t vsum32 = vreinterpret_u32_u64(vsum); in __chksum_arm_simd()
/external/arm-optimized-routines/networking/arm/
Dchksum_simd.c21 uint64x1_t vsum = { 0 }; in __chksum_arm_simd() local
44 vsum = vpaddl_u32(vtmp); in __chksum_arm_simd()
97 vsum = vpadal_u32(vsum, vtmp); in __chksum_arm_simd()
105 vsum = vpadal_u32(vsum, vtmp); in __chksum_arm_simd()
125 vsum = vpadal_u32(vsum, vreinterpret_u32_u64(vword64)); in __chksum_arm_simd()
129 vsum = vpaddl_u32(vreinterpret_u32_u64(vsum)); in __chksum_arm_simd()
130 vsum = vpaddl_u32(vreinterpret_u32_u64(vsum)); in __chksum_arm_simd()
131 Assert(vget_lane_u32(vreinterpret_u32_u64(vsum), 1) == 0); in __chksum_arm_simd()
134 uint32x2_t vsum32 = vreinterpret_u32_u64(vsum); in __chksum_arm_simd()
/external/libaom/av1/encoder/x86/
Dhighbd_temporal_filter_avx2.c125 static AOM_FORCE_INLINE int32_t xx_mask_and_hadd(__m256i vsum, int i) { in xx_mask_and_hadd() argument
127 __m256i vtmp = _mm256_and_si256(vsum, *(__m256i *)sse_bytemask[i]); in xx_mask_and_hadd()
185 __m256i vsum = _mm256_add_epi32(vsum3, vsrc[4]); in highbd_apply_temporal_filter() local
194 acc_5x5_sse[row][0] = xx_mask_and_hadd(vsum, 0); in highbd_apply_temporal_filter()
195 acc_5x5_sse[row][1] = xx_mask_and_hadd(vsum, 1); in highbd_apply_temporal_filter()
196 acc_5x5_sse[row][2] = xx_mask_and_hadd(vsum, 2); in highbd_apply_temporal_filter()
197 acc_5x5_sse[row][3] = xx_mask_and_hadd(vsum, 3); in highbd_apply_temporal_filter()
203 __m256i vsum = _mm256_add_epi32(vsum3, vsrc[4]); in highbd_apply_temporal_filter() local
209 acc_5x5_sse[row][0] = xx_mask_and_hadd(vsum, 0); in highbd_apply_temporal_filter()
210 acc_5x5_sse[row][1] = xx_mask_and_hadd(vsum, 1); in highbd_apply_temporal_filter()
[all …]
/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/u8lut32norm/
Dscalar.c21 uint32_t vsum = 0; in compute_sum() local
24 vsum += t[vx]; in compute_sum()
26 return vsum; in compute_sum()
36 const uint32_t vsum = compute_sum(n, x, t); in pytorch_u8lut32norm_ukernel__scalar() local
37 assert(vsum != 0); in pytorch_u8lut32norm_ukernel__scalar()
39 struct fxdiv_divisor_uint32_t vsum_divisor = fxdiv_init_uint32_t(vsum); in pytorch_u8lut32norm_ukernel__scalar()
40 const uint32_t vrounding = (vsum >> 1); in pytorch_u8lut32norm_ukernel__scalar()
/external/XNNPACK/src/u8-lut32norm/
Dscalar.c23 uint32_t vsum = 0; in compute_sum() local
26 vsum += t[vx]; in compute_sum()
28 return vsum; in compute_sum()
39 const uint32_t vsum = compute_sum(n, x, t); in xnn_u8_lut32norm_ukernel__scalar() local
40 assert(vsum != 0); in xnn_u8_lut32norm_ukernel__scalar()
42 struct fxdiv_divisor_uint32_t vsum_divisor = fxdiv_init_uint32_t(vsum); in xnn_u8_lut32norm_ukernel__scalar()
43 const uint32_t vrounding = (vsum >> 1); in xnn_u8_lut32norm_ukernel__scalar()
/external/XNNPACK/src/qs8-gavgpool/
Dmultipass-neon.c.in80 ${XINT16X8_T} vsum${ABC[C:C+8]} = ${VADDL_X8}(vi0x${ABC[C:C+8]}, vi1x${ABC[C:C+8]});
86 vsum${ABC[C:C+8]} = ${VADDW_X8}(vsum${ABC[C:C+8]}, vi${M}x${ABC[C:C+8]});
90 const int32x4_t vacc${ABC[C:C+4]} = vaddw_s16(vinit_bias, vget_low_s16(vsum${ABC[C:C+8]}));
91 … const int32x4_t vacc${ABC[C+4:C+8]} = vaddw_s16(vinit_bias, vget_high_s16(vsum${ABC[C:C+8]}));
93 …einterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vinit_bias), vget_low_u16(vsum${ABC[C:C+8]})));
94 …interpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vinit_bias), vget_high_u16(vsum${ABC[C:C+8]})));
104 ${XINT16X8_T} vsum${ABC[0:8]} = ${VADDL_X8}(vi0x${ABC[0:8]}, vi1x${ABC[0:8]});
109 vsum${ABC[0:8]} = ${VADDW_X8}(vsum${ABC[0:8]}, vi${M}x${ABC[0:8]});
112 const int32x4_t vacc${ABC[0:4]} = vaddw_s16(vinit_bias, vget_low_s16(vsum${ABC[0:8]}));
113 const int32x4_t vacc${ABC[4:8]} = vaddw_s16(vinit_bias, vget_high_s16(vsum${ABC[0:8]}));
[all …]
/external/XNNPACK/src/f16-gavgpool-cw/
Dneonfp16arith-x8.c79 const float16x4_t vsum = vpadd_f16(vget_low_f16(vsum0123), vget_high_f16(vsum0123)); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8() local
87 const float16x4_t vsum = vpadd_f16(vsum01_lo, vsum23_lo); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8() local
90 float16x4_t vout = vmul_f16(vsum, vmultiplier); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8()
122 float16x4_t vsum = vadd_f16(vget_low_f16(vsum0), vget_high_f16(vsum0)); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8() local
123 vsum = vpadd_f16(vsum, vsum); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8()
124 vsum = vpadd_f16(vsum, vsum); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8()
126 float16x4_t vout = vmul_f16(vsum, vmultiplier); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8()
/external/libmpeg2/common/x86/
Dicv_variance_ssse3.c94 __m128i vsum, vsum_sqr; in icv_variance_8x4_ssse3() local
128 vsum = _mm_add_epi64(sum_r0, sum_r1); in icv_variance_8x4_ssse3()
129 vsum = _mm_add_epi64(vsum, _mm_srli_si128(vsum, 8)); in icv_variance_8x4_ssse3()
131 sum = _mm_cvtsi128_si32(vsum); in icv_variance_8x4_ssse3()
/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8avgpool/
Dmp8x9p8q-neon.c88 const uint16x8_t vsum = vaddq_u16(vsum2345, vsum01678); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
91 vaddw_s16(vbias, vreinterpret_s16_u16(vget_low_u16(vsum))); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
93 vaddw_s16(vbias, vreinterpret_s16_u16(vget_high_u16(vsum))); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
141 const uint16x8_t vsum = vaddq_u16(vsum2345, vsum01678); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
144 vaddw_s16(vbias, vreinterpret_s16_u16(vget_low_u16(vsum))); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
146 vaddw_s16(vbias, vreinterpret_s16_u16(vget_high_u16(vsum))); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
194 const uint16x8_t vsum = vaddq_u16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
196 vacc_lo = vaddw_s16(vacc_lo, vreinterpret_s16_u16(vget_low_u16(vsum))); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
197 vacc_hi = vaddw_s16(vacc_hi, vreinterpret_s16_u16(vget_high_u16(vsum))); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
244 const uint16x8_t vsum = vaddq_u16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
[all …]
Dmp8x9p8q-sse2.c87 const __m128i vsum = _mm_add_epi16(vsum2345, vsum01678); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
90 _mm_add_epi32(vbias, _mm_unpacklo_epi16(vsum, vzero)); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2()
92 _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2()
149 const __m128i vsum = _mm_add_epi16(vsum2345, vsum01678); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
152 _mm_add_epi32(vbias, _mm_unpacklo_epi16(vsum, vzero)); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2()
154 _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2()
210 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
212 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vsum, vzero)); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2()
213 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vsum, vzero)); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2()
268 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
[all …]
/external/XNNPACK/src/f32-gavgpool/
D7p7x-minmax-scalar-c1.c52 const float vsum = vsum016 + vsum2345; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local
54 *b++ = vsum; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1()
86 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local
88 *b++ = vsum; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1()
140 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local
142 float vout = vsum * vscale; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1()
D7p7x-minmax-wasm-c1.c52 const float vsum = vsum016 + vsum2345; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local
54 *b++ = vsum; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1()
86 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local
88 *b++ = vsum; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1()
140 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local
142 float vout = vsum * vscale; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1()
D7p7x-minmax-wasmsimd-arm-c4.c61 const v128_t vsum = wasm_f32x4_add(vsum016, vsum2345); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
63 wasm_v128_store(b, vsum); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4()
102 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
104 wasm_v128_store(b, vsum); b += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4()
164 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
166 v128_t vout = wasm_f32x4_mul(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4()
193 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
195 v128_t vout = wasm_f32x4_mul(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4()
D7p7x-minmax-wasmsimd-x86-c4.c61 const v128_t vsum = wasm_f32x4_add(vsum016, vsum2345); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
63 wasm_v128_store(b, vsum); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4()
102 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
104 wasm_v128_store(b, vsum); b += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4()
164 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
166 v128_t vout = wasm_f32x4_mul(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4()
193 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
195 v128_t vout = wasm_f32x4_mul(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4()
D7p7x-minmax-sse-c4.c61 const __m128 vsum = _mm_add_ps(vsum016, vsum2345); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
63 _mm_store_ps(b, vsum); b += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
101 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
103 _mm_store_ps(b, vsum); b += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
163 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
165 __m128 vout = _mm_mul_ps(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
192 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
194 __m128 vout = _mm_mul_ps(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
D7p7x-minmax-neon-c4.c54 const float32x4_t vsum = vaddq_f32(vsum016, vsum2345); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
56 vst1q_f32(b, vsum); b += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4()
87 const float32x4_t vsum = vaddq_f32(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
89 vst1q_f32(b, vsum); b += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4()
141 const float32x4_t vsum = vaddq_f32(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
143 float32x4_t vout = vmulq_f32(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4()
169 const float32x4_t vsum = vaddq_f32(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
171 float32x4_t vout = vmulq_f32(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4()

12345