/external/XNNPACK/src/f32-gavgpool-cw/ |
D | neon-x4.c | 39 float32x4_t vsum3 = vmovq_n_f32(0.0f); in xnn_f32_gavgpool_cw_ukernel__neon_x4() local 50 vsum3 = vaddq_f32(vsum3, vi3); in xnn_f32_gavgpool_cw_ukernel__neon_x4() 68 vsum3 = vaddq_f32(vsum3, vi3); in xnn_f32_gavgpool_cw_ukernel__neon_x4() 75 const float32x4_t vsum23 = vpaddq_f32(vsum2, vsum3); in xnn_f32_gavgpool_cw_ukernel__neon_x4() 81 vadd_f32(vget_low_f32(vsum3), vget_high_f32(vsum3))); in xnn_f32_gavgpool_cw_ukernel__neon_x4()
|
D | wasmsimd-arm-x4.c | 39 v128_t vsum3 = vsum0; in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4() local 54 vsum3 = wasm_f32x4_add(vsum3, vi3); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4() 71 vsum3 = wasm_f32x4_add(vsum3, vi3); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4() 77 … vsum23 = wasm_f32x4_add(wasm_v32x4_shuffle(vsum2, vsum3, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum2, v… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4()
|
D | wasmsimd-x86-x4.c | 39 v128_t vsum3 = vsum0; in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4() local 54 vsum3 = wasm_f32x4_add(vsum3, vi3); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4() 71 vsum3 = wasm_f32x4_add(vsum3, vi3); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4() 77 … vsum23 = wasm_f32x4_add(wasm_v32x4_shuffle(vsum2, vsum3, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum2, v… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4()
|
D | sse-x4.c | 39 __m128 vsum3 = _mm_setzero_ps(); in xnn_f32_gavgpool_cw_ukernel__sse_x4() local 54 vsum3 = _mm_add_ps(vsum3, vi3); in xnn_f32_gavgpool_cw_ukernel__sse_x4() 71 vsum3 = _mm_add_ps(vsum3, vi3); in xnn_f32_gavgpool_cw_ukernel__sse_x4() 77 const __m128 vsum23 = _mm_add_ps(_mm_unpacklo_ps(vsum2, vsum3), _mm_unpackhi_ps(vsum2, vsum3)); in xnn_f32_gavgpool_cw_ukernel__sse_x4()
|
D | scalar-x1.c | 33 float vsum3 = 0.f; in xnn_f32_gavgpool_cw_ukernel__scalar_x1() local 39 vsum3 += i0[3]; in xnn_f32_gavgpool_cw_ukernel__scalar_x1() 50 float vout = ( (vsum0 + vsum1) + (vsum2 + vsum3) ) * vmultiplier; in xnn_f32_gavgpool_cw_ukernel__scalar_x1()
|
/external/libaom/libaom/av1/encoder/x86/ |
D | highbd_temporal_filter_avx2.c | 182 __m256i vsum3 = _mm256_add_epi32(vsum1, vsum2); in highbd_apply_temporal_filter() local 183 __m256i vsum = _mm256_add_epi32(vsum3, vsrc[4]); in highbd_apply_temporal_filter() 200 __m256i vsum3 = _mm256_add_epi32(vsum1, vsum2); in highbd_apply_temporal_filter() local 201 __m256i vsum = _mm256_add_epi32(vsum3, vsrc[4]); in highbd_apply_temporal_filter() 228 __m256i vsum3 = _mm256_add_epi32(vsum1, vsum2); in highbd_apply_temporal_filter() local 229 __m256i vsum = _mm256_add_epi32(vsum3, vsrc[4]); in highbd_apply_temporal_filter() 247 __m256i vsum3 = _mm256_add_epi32(vsum1, vsum2); in highbd_apply_temporal_filter() local 248 __m256i vsum = _mm256_add_epi32(vsum3, vsrc[4]); in highbd_apply_temporal_filter() 276 __m256i vsum3 = _mm256_add_epi32(vsum1, vsum2); in highbd_apply_temporal_filter() local 277 __m256i vsum = _mm256_add_epi32(vsum3, vsrc[4]); in highbd_apply_temporal_filter() [all …]
|
/external/arm-optimized-routines/networking/aarch64/ |
D | chksum_simd.c | 81 uint64x2_t vsum3 = { 0, 0 }; in __chksum_aarch64_simd() local 93 vsum3 = vpadalq_u32(vsum3, vtmp3); in __chksum_aarch64_simd() 100 vsum1 = vpadalq_u32(vsum1, vreinterpretq_u32_u64(vsum3)); in __chksum_aarch64_simd()
|
/external/arm-optimized-routines/networking/arm/ |
D | chksum_simd.c | 55 uint64x2_t vsum3 = { 0, 0 }; in __chksum_arm_simd() local 66 vsum3 = vpadalq_u32(vsum3, vtmp3); in __chksum_arm_simd() 73 vsum1 = vpadalq_u32(vsum1, vreinterpretq_u32_u64(vsum3)); in __chksum_arm_simd()
|
/external/XNNPACK/src/amalgam/ |
D | sse.c | 4191 __m128 vsum3 = _mm_setzero_ps(); in xnn_f32_gavgpool_cw_ukernel__sse_x4() local 4206 vsum3 = _mm_add_ps(vsum3, vi3); in xnn_f32_gavgpool_cw_ukernel__sse_x4() 4223 vsum3 = _mm_add_ps(vsum3, vi3); in xnn_f32_gavgpool_cw_ukernel__sse_x4() 4229 const __m128 vsum23 = _mm_add_ps(_mm_unpacklo_ps(vsum2, vsum3), _mm_unpackhi_ps(vsum2, vsum3)); in xnn_f32_gavgpool_cw_ukernel__sse_x4()
|