/external/llvm/test/CodeGen/X86/ |
D | peephole-multiple-folds.ll | 14 %vsum1 = phi <8 x float> [ %vsum1.next, %loopbody ], [ zeroinitializer, %entry ] 18 …%vsum1.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %m1, <8 x float> zeroi… 20 %vsum1.next.1 = extractelement <8 x float> %vsum1.next, i32 0 21 %c = fcmp oeq float %vsum1.next.1, 0.0 25 %r = fadd <8 x float> %vsum1.next, %vsum2.next
|
/external/llvm-project/llvm/test/CodeGen/X86/ |
D | peephole-multiple-folds.ll | 14 %vsum1 = phi <8 x float> [ %vsum1.next, %loopbody ], [ zeroinitializer, %entry ] 18 …%vsum1.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %m1, <8 x float> zeroi… 20 %vsum1.next.1 = extractelement <8 x float> %vsum1.next, i32 0 21 %c = fcmp oeq float %vsum1.next.1, 0.0 25 %r = fadd <8 x float> %vsum1.next, %vsum2.next
|
/external/arm-optimized-routines/networking/aarch64/ |
D | chksum_simd.c | 79 uint64x2_t vsum1 = { 0, 0 }; in __chksum_aarch64_simd() local 91 vsum1 = vpadalq_u32(vsum1, vtmp1); in __chksum_aarch64_simd() 100 vsum1 = vpadalq_u32(vsum1, vreinterpretq_u32_u64(vsum3)); in __chksum_aarch64_simd() 108 vsum1 = vpadalq_u32(vsum1, vtmp1); in __chksum_aarch64_simd() 115 vsum0 = vpadalq_u32(vsum0, vreinterpretq_u32_u64(vsum1)); in __chksum_aarch64_simd()
|
/external/llvm-project/libc/AOR_v20.02/networking/aarch64/ |
D | chksum_simd.c | 80 uint64x2_t vsum1 = { 0, 0 }; in __chksum_aarch64_simd() local 92 vsum1 = vpadalq_u32(vsum1, vtmp1); in __chksum_aarch64_simd() 101 vsum1 = vpadalq_u32(vsum1, vreinterpretq_u32_u64(vsum3)); in __chksum_aarch64_simd() 109 vsum1 = vpadalq_u32(vsum1, vtmp1); in __chksum_aarch64_simd() 116 vsum0 = vpadalq_u32(vsum0, vreinterpretq_u32_u64(vsum1)); in __chksum_aarch64_simd()
|
/external/rust/crates/libz-sys/src/zlib-ng/arch/power/ |
D | adler32_power8.c | 81 vector unsigned int vsum1, vsum2; in adler32_power8() local 94 vsum1 = vec_sum4s(vbuf, v_zeros); /* sum(i=1 to 16) buf[i]. */ in adler32_power8() 100 vs1 = vec_add(vsum1, vs1); in adler32_power8() 106 vs1 = vec_sumsu(vs1, vsum1); in adler32_power8() 129 vsum1 = vec_sum4s(vbuf, v_zeros); /* sum(i=1 to 16) buf[i]. */ in adler32_power8() 135 vs1 = vec_add(vsum1, vs1); in adler32_power8() 141 vs1 = vec_sumsu(vs1, vsum1); in adler32_power8()
|
/external/XNNPACK/src/f32-gavgpool-cw/ |
D | neon-x4.c | 37 float32x4_t vsum1 = vmovq_n_f32(0.0f); in xnn_f32_gavgpool_cw_ukernel__neon_x4() local 48 vsum1 = vaddq_f32(vsum1, vi1); in xnn_f32_gavgpool_cw_ukernel__neon_x4() 66 vsum1 = vaddq_f32(vsum1, vi1); in xnn_f32_gavgpool_cw_ukernel__neon_x4() 74 const float32x4_t vsum01 = vpaddq_f32(vsum0, vsum1); in xnn_f32_gavgpool_cw_ukernel__neon_x4() 79 vadd_f32(vget_low_f32(vsum1), vget_high_f32(vsum1))); in xnn_f32_gavgpool_cw_ukernel__neon_x4()
|
D | wasmsimd-arm-x4.c | 37 v128_t vsum1 = vsum0; in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4() local 52 vsum1 = wasm_f32x4_add(vsum1, vi1); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4() 69 vsum1 = wasm_f32x4_add(vsum1, vi1); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4() 76 … vsum01 = wasm_f32x4_add(wasm_v32x4_shuffle(vsum0, vsum1, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum0, v… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4()
|
D | sse-x4.c | 37 __m128 vsum1 = _mm_setzero_ps(); in xnn_f32_gavgpool_cw_ukernel__sse_x4() local 52 vsum1 = _mm_add_ps(vsum1, vi1); in xnn_f32_gavgpool_cw_ukernel__sse_x4() 69 vsum1 = _mm_add_ps(vsum1, vi1); in xnn_f32_gavgpool_cw_ukernel__sse_x4() 76 const __m128 vsum01 = _mm_add_ps(_mm_unpacklo_ps(vsum0, vsum1), _mm_unpackhi_ps(vsum0, vsum1)); in xnn_f32_gavgpool_cw_ukernel__sse_x4()
|
D | wasmsimd-x86-x4.c | 37 v128_t vsum1 = vsum0; in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4() local 52 vsum1 = wasm_f32x4_add(vsum1, vi1); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4() 69 vsum1 = wasm_f32x4_add(vsum1, vi1); in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4() 76 … vsum01 = wasm_f32x4_add(wasm_v32x4_shuffle(vsum0, vsum1, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum0, v… in xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4()
|
D | scalar-x1.c | 31 float vsum1 = 0.f; in xnn_f32_gavgpool_cw_ukernel__scalar_x1() local 37 vsum1 += i0[1]; in xnn_f32_gavgpool_cw_ukernel__scalar_x1() 50 float vout = ( (vsum0 + vsum1) + (vsum2 + vsum3) ) * vmultiplier; in xnn_f32_gavgpool_cw_ukernel__scalar_x1()
|
/external/arm-optimized-routines/networking/arm/ |
D | chksum_simd.c | 53 uint64x2_t vsum1 = { 0, 0 }; in __chksum_arm_simd() local 64 vsum1 = vpadalq_u32(vsum1, vtmp1); in __chksum_arm_simd() 73 vsum1 = vpadalq_u32(vsum1, vreinterpretq_u32_u64(vsum3)); in __chksum_arm_simd() 74 vsum0 = vpadalq_u32(vsum0, vreinterpretq_u32_u64(vsum1)); in __chksum_arm_simd()
|
/external/llvm-project/libc/AOR_v20.02/networking/arm/ |
D | chksum_simd.c | 54 uint64x2_t vsum1 = { 0, 0 }; in __chksum_arm_simd() local 65 vsum1 = vpadalq_u32(vsum1, vtmp1); in __chksum_arm_simd() 74 vsum1 = vpadalq_u32(vsum1, vreinterpretq_u32_u64(vsum3)); in __chksum_arm_simd() 75 vsum0 = vpadalq_u32(vsum0, vreinterpretq_u32_u64(vsum1)); in __chksum_arm_simd()
|
/external/libaom/libaom/av1/encoder/x86/ |
D | temporal_filter_sse2.c | 87 static int32_t xx_mask_and_hadd(__m128i vsum1, __m128i vsum2, int i) { in xx_mask_and_hadd() argument 90 veca = _mm_and_si128(vsum1, *(__m128i *)sse_bytemask_2x4[i][0]); in xx_mask_and_hadd() 146 __m128i vsum1 = _mm_setzero_si128(); in apply_temporal_filter_planewise() local 151 vsum1 = _mm_add_epi32(vsrc[i][0], vsum1); in apply_temporal_filter_planewise() 173 acc_5x5_sse[row][col + i] = xx_mask_and_hadd(vsum1, vsum2, i); in apply_temporal_filter_planewise()
|
/external/rust/crates/libz-sys/src/zlib-ng/arch/x86/ |
D | adler32_ssse3.c | 78 __m128i vsum1 = _mm_madd_epi16(v_short_sum1, dot3v); // sum 8 shorts to 4 int32_t; in adler32_ssse3() local 80 vs1 = _mm_add_epi32(vsum1, vs1); in adler32_ssse3()
|
D | adler32_avx.c | 76 __m256i vsum1 = _mm256_madd_epi16(v_short_sum1, dot3v); // sum 8 shorts to 4 int32_t; in adler32_avx2() local 78 vs1 = _mm256_add_epi32(vsum1, vs1); in adler32_avx2()
|
/external/llvm-project/llvm/test/CodeGen/NVPTX/ |
D | match.ll | 72 %vsum1 = add i32 %v1, %v2 74 %vsum3 = add i32 %vsum1, %vsum2 108 %vsum1 = add i64 %v1, %v2 110 %vsum3 = add i64 %vsum1, %vsum2
|