/external/libvpx/libvpx/vpx_dsp/x86/ |
D | fwd_txfm_sse2.c | 29 tmp = _mm_add_epi16(in0, in1); in vpx_fdct4x4_1_sse2() 54 u0 = _mm_add_epi16(in0, in1); in vpx_fdct8x8_1_sse2() 55 u1 = _mm_add_epi16(in2, in3); in vpx_fdct8x8_1_sse2() 62 sum = _mm_add_epi16(u0, u1); in vpx_fdct8x8_1_sse2() 64 in0 = _mm_add_epi16(in0, in1); in vpx_fdct8x8_1_sse2() 65 in2 = _mm_add_epi16(in2, in3); in vpx_fdct8x8_1_sse2() 66 sum = _mm_add_epi16(sum, in0); in vpx_fdct8x8_1_sse2() 69 sum = _mm_add_epi16(sum, in2); in vpx_fdct8x8_1_sse2() 100 u0 = _mm_add_epi16(in0, in1); in vpx_fdct16x16_1_sse2() 101 u1 = _mm_add_epi16(in2, in3); in vpx_fdct16x16_1_sse2() [all …]
|
D | inv_txfm_sse2.h | 139 d0 = _mm_add_epi16(in_x, d0); in recon_and_store() 148 out[0] = _mm_add_epi16(in[0], final_rounding); in round_shift_8x8() 149 out[1] = _mm_add_epi16(in[1], final_rounding); in round_shift_8x8() 150 out[2] = _mm_add_epi16(in[2], final_rounding); in round_shift_8x8() 151 out[3] = _mm_add_epi16(in[3], final_rounding); in round_shift_8x8() 152 out[4] = _mm_add_epi16(in[4], final_rounding); in round_shift_8x8() 153 out[5] = _mm_add_epi16(in[5], final_rounding); in round_shift_8x8() 154 out[6] = _mm_add_epi16(in[6], final_rounding); in round_shift_8x8() 155 out[7] = _mm_add_epi16(in[7], final_rounding); in round_shift_8x8() 198 d[0] = _mm_add_epi16(d[0], in[0]); in recon_and_store4x4_sse2() [all …]
|
D | highbd_loopfilter_sse2.c | 254 pixelFilter_p = _mm_add_epi16(_mm_add_epi16(p6, p5), _mm_add_epi16(p4, p3)); in vpx_highbd_lpf_horizontal_16_sse2() 255 pixelFilter_q = _mm_add_epi16(_mm_add_epi16(q6, q5), _mm_add_epi16(q4, q3)); in vpx_highbd_lpf_horizontal_16_sse2() 257 pixetFilter_p2p1p0 = _mm_add_epi16(p0, _mm_add_epi16(p2, p1)); in vpx_highbd_lpf_horizontal_16_sse2() 258 pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); in vpx_highbd_lpf_horizontal_16_sse2() 260 pixetFilter_q2q1q0 = _mm_add_epi16(q0, _mm_add_epi16(q2, q1)); in vpx_highbd_lpf_horizontal_16_sse2() 261 pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); in vpx_highbd_lpf_horizontal_16_sse2() 263 _mm_add_epi16(eight, _mm_add_epi16(pixelFilter_p, pixelFilter_q)); in vpx_highbd_lpf_horizontal_16_sse2() 264 pixetFilter_p2p1p0 = _mm_add_epi16( in vpx_highbd_lpf_horizontal_16_sse2() 265 four, _mm_add_epi16(pixetFilter_p2p1p0, pixetFilter_q2q1q0)); in vpx_highbd_lpf_horizontal_16_sse2() 267 _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, _mm_add_epi16(p7, p0)), 4); in vpx_highbd_lpf_horizontal_16_sse2() [all …]
|
D | loopfilter_sse2.c | 392 pixelFilter_p = _mm_add_epi16(_mm_add_epi16(p6_16, p5_16), in vpx_lpf_horizontal_16_sse2() 393 _mm_add_epi16(p4_16, p3_16)); in vpx_lpf_horizontal_16_sse2() 394 pixelFilter_q = _mm_add_epi16(_mm_add_epi16(q6_16, q5_16), in vpx_lpf_horizontal_16_sse2() 395 _mm_add_epi16(q4_16, q3_16)); in vpx_lpf_horizontal_16_sse2() 397 pixetFilter_p2p1p0 = _mm_add_epi16(p0_16, _mm_add_epi16(p2_16, p1_16)); in vpx_lpf_horizontal_16_sse2() 398 pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); in vpx_lpf_horizontal_16_sse2() 400 pixetFilter_q2q1q0 = _mm_add_epi16(q0_16, _mm_add_epi16(q2_16, q1_16)); in vpx_lpf_horizontal_16_sse2() 401 pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); in vpx_lpf_horizontal_16_sse2() 403 _mm_add_epi16(eight, _mm_add_epi16(pixelFilter_p, pixelFilter_q)); in vpx_lpf_horizontal_16_sse2() 404 pixetFilter_p2p1p0 = _mm_add_epi16( in vpx_lpf_horizontal_16_sse2() [all …]
|
D | loopfilter_avx2.c | 194 pixelFilter_p = _mm_add_epi16(_mm_add_epi16(p6_16, p5_16), in vpx_lpf_horizontal_16_avx2() 195 _mm_add_epi16(p4_16, p3_16)); in vpx_lpf_horizontal_16_avx2() 196 pixelFilter_q = _mm_add_epi16(_mm_add_epi16(q6_16, q5_16), in vpx_lpf_horizontal_16_avx2() 197 _mm_add_epi16(q4_16, q3_16)); in vpx_lpf_horizontal_16_avx2() 199 pixetFilter_p2p1p0 = _mm_add_epi16(p0_16, _mm_add_epi16(p2_16, p1_16)); in vpx_lpf_horizontal_16_avx2() 200 pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); in vpx_lpf_horizontal_16_avx2() 202 pixetFilter_q2q1q0 = _mm_add_epi16(q0_16, _mm_add_epi16(q2_16, q1_16)); in vpx_lpf_horizontal_16_avx2() 203 pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); in vpx_lpf_horizontal_16_avx2() 205 _mm_add_epi16(eight, _mm_add_epi16(pixelFilter_p, pixelFilter_q)); in vpx_lpf_horizontal_16_avx2() 206 pixetFilter_p2p1p0 = _mm_add_epi16( in vpx_lpf_horizontal_16_avx2() [all …]
|
D | inv_txfm_ssse3.c | 79 out[0] = _mm_add_epi16(step1[0], step1[7]); in idct32_34_8x32_quarter_1() 80 out[1] = _mm_add_epi16(step1[1], step1[6]); in idct32_34_8x32_quarter_1() 81 out[2] = _mm_add_epi16(step1[2], step1[5]); in idct32_34_8x32_quarter_1() 82 out[3] = _mm_add_epi16(step1[3], step1[4]); in idct32_34_8x32_quarter_1() 202 step2[4] = _mm_add_epi16(step1[4], step1[5]); in idct32_135_8x32_quarter_1() 205 step2[7] = _mm_add_epi16(step1[7], step1[6]); in idct32_135_8x32_quarter_1() 208 step1[0] = _mm_add_epi16(step2[0], step2[3]); in idct32_135_8x32_quarter_1() 209 step1[1] = _mm_add_epi16(step2[0], step2[2]); in idct32_135_8x32_quarter_1() 217 out[0] = _mm_add_epi16(step1[0], step1[7]); in idct32_135_8x32_quarter_1() 218 out[1] = _mm_add_epi16(step1[1], step1[6]); in idct32_135_8x32_quarter_1() [all …]
|
D | inv_txfm_ssse3.h | 53 step2[4] = _mm_add_epi16(step1[4], step1[5]); // step2 4&7 in idct8x8_12_add_kernel_ssse3() 60 tmp[0] = _mm_add_epi16(step2[0], step2[2]); // step1 0&1 in idct8x8_12_add_kernel_ssse3() 66 tmp[0] = _mm_add_epi16(step1[3], step2[4]); // output 3&0 in idct8x8_12_add_kernel_ssse3() 67 tmp[1] = _mm_add_epi16(step1[2], step1[5]); // output 2&1 in idct8x8_12_add_kernel_ssse3() 85 step2[4] = _mm_add_epi16(step1[4], step1[5]); in idct8x8_12_add_kernel_ssse3() 88 step2[7] = _mm_add_epi16(step1[7], step1[6]); in idct8x8_12_add_kernel_ssse3() 91 step1[0] = _mm_add_epi16(step2[0], step2[3]); in idct8x8_12_add_kernel_ssse3() 92 step1[1] = _mm_add_epi16(step2[0], step2[2]); in idct8x8_12_add_kernel_ssse3() 98 io[0] = _mm_add_epi16(step1[0], step2[7]); in idct8x8_12_add_kernel_ssse3() 99 io[1] = _mm_add_epi16(step1[1], step1[6]); in idct8x8_12_add_kernel_ssse3() [all …]
|
/external/libaom/libaom/aom_dsp/x86/ |
D | highbd_loopfilter_sse2.c | 371 __m128i sum_p = _mm_add_epi16(pq[5], _mm_add_epi16(pq[4], pq[3])); in highbd_lpf_internal_14_sse2() 372 __m128i sum_lp = _mm_add_epi16(pq[0], _mm_add_epi16(pq[2], pq[1])); in highbd_lpf_internal_14_sse2() 373 sum_p = _mm_add_epi16(sum_p, sum_lp); in highbd_lpf_internal_14_sse2() 378 sum_p_0 = _mm_add_epi16(eight, _mm_add_epi16(sum_p, sum_q)); in highbd_lpf_internal_14_sse2() 379 sum_lp = _mm_add_epi16(four, _mm_add_epi16(sum_lp, sum_lq)); in highbd_lpf_internal_14_sse2() 381 flat_p[0] = _mm_add_epi16(sum_lp, _mm_add_epi16(pq[3], pq[0])); in highbd_lpf_internal_14_sse2() 382 flat_q[0] = _mm_add_epi16(sum_lp, _mm_add_epi16(q[3], q[0])); in highbd_lpf_internal_14_sse2() 384 sum_p6 = _mm_add_epi16(pq[6], pq[6]); in highbd_lpf_internal_14_sse2() 385 sum_p3 = _mm_add_epi16(pq[3], pq[3]); in highbd_lpf_internal_14_sse2() 390 work0_0 = _mm_add_epi16(_mm_add_epi16(pq[6], pq[0]), pq[1]); in highbd_lpf_internal_14_sse2() [all …]
|
D | loopfilter_sse2.c | 487 pixelFilter_p = _mm_add_epi16(p5_16, _mm_add_epi16(p4_16, p3_16)); in lpf_internal_14_dual_sse2() 488 pixelFilter_q = _mm_add_epi16(q5_16, _mm_add_epi16(q4_16, q3_16)); in lpf_internal_14_dual_sse2() 490 pixetFilter_p2p1p0 = _mm_add_epi16(p0_16, _mm_add_epi16(p2_16, p1_16)); in lpf_internal_14_dual_sse2() 491 pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); in lpf_internal_14_dual_sse2() 493 pixetFilter_q2q1q0 = _mm_add_epi16(q0_16, _mm_add_epi16(q2_16, q1_16)); in lpf_internal_14_dual_sse2() 494 pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); in lpf_internal_14_dual_sse2() 496 _mm_add_epi16(eight, _mm_add_epi16(pixelFilter_p, pixelFilter_q)); in lpf_internal_14_dual_sse2() 497 pixetFilter_p2p1p0 = _mm_add_epi16( in lpf_internal_14_dual_sse2() 498 four, _mm_add_epi16(pixetFilter_p2p1p0, pixetFilter_q2q1q0)); in lpf_internal_14_dual_sse2() 500 _mm_add_epi16(pixelFilter_p, in lpf_internal_14_dual_sse2() [all …]
|
/external/libgav1/libgav1/src/dsp/x86/ |
D | cdef_sse4.cc | 103 *partial_lo = _mm_add_epi16(*partial_lo, _mm_slli_si128(v_src_16[1], 2)); in AddPartial_D0_D4() 105 *partial_hi = _mm_add_epi16(*partial_hi, _mm_srli_si128(v_src_16[1], 14)); in AddPartial_D0_D4() 108 *partial_lo = _mm_add_epi16(*partial_lo, _mm_slli_si128(v_src_16[2], 4)); in AddPartial_D0_D4() 110 *partial_hi = _mm_add_epi16(*partial_hi, _mm_srli_si128(v_src_16[2], 12)); in AddPartial_D0_D4() 113 *partial_lo = _mm_add_epi16(*partial_lo, _mm_slli_si128(v_src_16[3], 6)); in AddPartial_D0_D4() 115 *partial_hi = _mm_add_epi16(*partial_hi, _mm_srli_si128(v_src_16[3], 10)); in AddPartial_D0_D4() 118 *partial_lo = _mm_add_epi16(*partial_lo, _mm_slli_si128(v_src_16[4], 8)); in AddPartial_D0_D4() 120 *partial_hi = _mm_add_epi16(*partial_hi, _mm_srli_si128(v_src_16[4], 8)); in AddPartial_D0_D4() 123 *partial_lo = _mm_add_epi16(*partial_lo, _mm_slli_si128(v_src_16[5], 10)); in AddPartial_D0_D4() 125 *partial_hi = _mm_add_epi16(*partial_hi, _mm_srli_si128(v_src_16[5], 6)); in AddPartial_D0_D4() [all …]
|
D | intra_edge_sse4.cc | 61 _mm_add_epi16(outers4_lo, _mm_srli_si128(outers4_lo, 4)); in ComputeKernel1Store12() 63 _mm_add_epi16(outers4_hi, _mm_srli_si128(outers4_hi, 4)); in ComputeKernel1Store12() 66 _mm_add_epi16(partial_sums_lo, centers8_lo), 4); in ComputeKernel1Store12() 68 _mm_add_epi16(partial_sums_hi, centers8_hi), 4); in ComputeKernel1Store12() 89 _mm_add_epi16(outers_lo, _mm_slli_epi16(outers_lo, 2)); in ComputeKernel2Store12() 91 _mm_add_epi16(outers_hi, _mm_slli_epi16(outers_hi, 2)); in ComputeKernel2Store12() 93 const __m128i centers6_lo = _mm_add_epi16(_mm_slli_epi16(centers_lo, 1), in ComputeKernel2Store12() 95 const __m128i centers6_hi = _mm_add_epi16(_mm_slli_epi16(centers_hi, 1), in ComputeKernel2Store12() 99 _mm_add_epi16(outers5_lo, _mm_srli_si128(outers5_lo, 4)); in ComputeKernel2Store12() 102 _mm_add_epi16(centers6_lo, partial_sums_lo), 4); in ComputeKernel2Store12() [all …]
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-sse41-c24-acc2.c | 74 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 75 __m128i vacc0x89ABCDEF = _mm_add_epi16(vxi0x89ABCDEF, vxi1x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 76 __m128i vacc0xGHIJKLMN = _mm_add_epi16(vxi0xGHIJKLMN, vxi1xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 77 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 78 __m128i vacc1x89ABCDEF = _mm_add_epi16(vxi2x89ABCDEF, vxi3x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 79 __m128i vacc1xGHIJKLMN = _mm_add_epi16(vxi2xGHIJKLMN, vxi3xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 81 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 82 vacc0x89ABCDEF = _mm_add_epi16(vacc0x89ABCDEF, vxi4x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 83 vacc0xGHIJKLMN = _mm_add_epi16(vacc0xGHIJKLMN, vxi4xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 84 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() [all …]
|
D | 7p7x-minmax-sse41-c16-acc2.c | 67 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 68 __m128i vacc0x89ABCDEF = _mm_add_epi16(vxi0x89ABCDEF, vxi1x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 69 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 70 __m128i vacc1x89ABCDEF = _mm_add_epi16(vxi2x89ABCDEF, vxi3x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 72 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 73 vacc0x89ABCDEF = _mm_add_epi16(vacc0x89ABCDEF, vxi4x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 74 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 75 vacc1x89ABCDEF = _mm_add_epi16(vacc1x89ABCDEF, vxi5x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 76 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 77 vacc0x89ABCDEF = _mm_add_epi16(vacc0x89ABCDEF, vxi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() [all …]
|
D | 7p7x-minmax-sse2-c24-acc2.c | 95 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 96 __m128i vacc0x89ABCDEF = _mm_add_epi16(vxi0x89ABCDEF, vxi1x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 97 __m128i vacc0xGHIJKLMN = _mm_add_epi16(vxi0xGHIJKLMN, vxi1xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 98 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 99 __m128i vacc1x89ABCDEF = _mm_add_epi16(vxi2x89ABCDEF, vxi3x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 100 __m128i vacc1xGHIJKLMN = _mm_add_epi16(vxi2xGHIJKLMN, vxi3xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 102 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 103 vacc0x89ABCDEF = _mm_add_epi16(vacc0x89ABCDEF, vxi4x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 104 vacc0xGHIJKLMN = _mm_add_epi16(vacc0xGHIJKLMN, vxi4xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 105 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() [all …]
|
D | 7p7x-minmax-ssse3-c24-acc2.c | 95 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 96 __m128i vacc0x89ABCDEF = _mm_add_epi16(vxi0x89ABCDEF, vxi1x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 97 __m128i vacc0xGHIJKLMN = _mm_add_epi16(vxi0xGHIJKLMN, vxi1xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 98 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 99 __m128i vacc1x89ABCDEF = _mm_add_epi16(vxi2x89ABCDEF, vxi3x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 100 __m128i vacc1xGHIJKLMN = _mm_add_epi16(vxi2xGHIJKLMN, vxi3xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 102 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 103 vacc0x89ABCDEF = _mm_add_epi16(vacc0x89ABCDEF, vxi4x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 104 vacc0xGHIJKLMN = _mm_add_epi16(vacc0xGHIJKLMN, vxi4xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 105 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() [all …]
|
D | 7p7x-minmax-ssse3-c16-acc2.c | 81 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 82 __m128i vacc0x89ABCDEF = _mm_add_epi16(vxi0x89ABCDEF, vxi1x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 83 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 84 __m128i vacc1x89ABCDEF = _mm_add_epi16(vxi2x89ABCDEF, vxi3x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 86 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 87 vacc0x89ABCDEF = _mm_add_epi16(vacc0x89ABCDEF, vxi4x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 88 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 89 vacc1x89ABCDEF = _mm_add_epi16(vacc1x89ABCDEF, vxi5x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 90 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 91 vacc0x89ABCDEF = _mm_add_epi16(vacc0x89ABCDEF, vxi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() [all …]
|
D | 7p7x-minmax-sse41-c8-acc2.c | 60 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 61 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 63 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 64 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 65 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 68 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 106 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 107 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 109 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 110 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() [all …]
|
D | 7p7x-minmax-sse2-c16-acc2.c | 81 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 82 __m128i vacc0x89ABCDEF = _mm_add_epi16(vxi0x89ABCDEF, vxi1x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 83 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 84 __m128i vacc1x89ABCDEF = _mm_add_epi16(vxi2x89ABCDEF, vxi3x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 86 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 87 vacc0x89ABCDEF = _mm_add_epi16(vacc0x89ABCDEF, vxi4x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 88 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 89 vacc1x89ABCDEF = _mm_add_epi16(vacc1x89ABCDEF, vxi5x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 90 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 91 vacc0x89ABCDEF = _mm_add_epi16(vacc0x89ABCDEF, vxi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() [all …]
|
D | 7x-minmax-sse41-c24-acc2.c | 91 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 92 __m128i vacc0x89ABCDEF = _mm_add_epi16(vxi0x89ABCDEF, vxi1x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 93 __m128i vacc0xGHIJKLMN = _mm_add_epi16(vxi0xGHIJKLMN, vxi1xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 94 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 95 __m128i vacc1x89ABCDEF = _mm_add_epi16(vxi2x89ABCDEF, vxi3x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 96 __m128i vacc1xGHIJKLMN = _mm_add_epi16(vxi2xGHIJKLMN, vxi3xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 98 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 99 vacc0x89ABCDEF = _mm_add_epi16(vacc0x89ABCDEF, vxi4x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 100 vacc0xGHIJKLMN = _mm_add_epi16(vacc0xGHIJKLMN, vxi4xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 101 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() [all …]
|
/external/libavc/common/x86/ |
D | ih264_deblk_luma_ssse3.c | 219 Alpha_8x16 = _mm_add_epi16(Alpha_8x16, const_val2_16x8); in ih264_deblk_luma_vert_bs4_ssse3() 266 temp1 = _mm_add_epi16(p0_8x16, q1_8x16); in ih264_deblk_luma_vert_bs4_ssse3() 267 temp2 = _mm_add_epi16(p1_8x16, q0_8x16); in ih264_deblk_luma_vert_bs4_ssse3() 268 temp5 = _mm_add_epi16(temp1, const_val2_16x8); in ih264_deblk_luma_vert_bs4_ssse3() 269 temp6 = _mm_add_epi16(temp2, const_val2_16x8); in ih264_deblk_luma_vert_bs4_ssse3() 272 temp1 = _mm_add_epi16(temp5, temp3); in ih264_deblk_luma_vert_bs4_ssse3() 273 temp2 = _mm_add_epi16(temp6, temp4); in ih264_deblk_luma_vert_bs4_ssse3() 278 temp6 = _mm_add_epi16(temp6, p0_8x16); in ih264_deblk_luma_vert_bs4_ssse3() 279 temp5 = _mm_add_epi16(temp5, q0_8x16); in ih264_deblk_luma_vert_bs4_ssse3() 280 temp1 = _mm_add_epi16(temp6, p2_8x16); in ih264_deblk_luma_vert_bs4_ssse3() [all …]
|
D | ih264_luma_intra_pred_filters_ssse3.c | 346 res1_8x16b = _mm_add_epi16(top_8x16b, top_sh_8x16b); in ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3() 349 res1_8x16b = _mm_add_epi16(res1_8x16b, const_2_8x16b); in ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3() 350 res1_8x16b = _mm_add_epi16(res2_8x16b, res1_8x16b); in ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3() 431 …res1_8x16b = _mm_add_epi16(top_left_8x16b, top_left_sh_8x16b); //l3+l2 l2+l1 l1+l0 l0+tl… in ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3() 435 res1_8x16b = _mm_add_epi16(res1_8x16b, const_2_8x16b); in ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3() 436 …res1_8x16b = _mm_add_epi16(res2_8x16b, res1_8x16b); //l3+2*l2+l1+2 l2+2*l1+l0… in ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3() 519 …w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 … in ih264_intra_pred_luma_4x4_mode_vert_r_ssse3() 524 w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l2+2*l1+l0 l1+2*l0+tl ... in ih264_intra_pred_luma_4x4_mode_vert_r_ssse3() 525 w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b); in ih264_intra_pred_luma_4x4_mode_vert_r_ssse3() 612 …w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l3+l2 l2+l1 l1+l0 l0+tl tl+t0 … in ih264_intra_pred_luma_4x4_mode_horz_d_ssse3() [all …]
|
D | ih264_inter_pred_filters_ssse3.c | 279 res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b); in ih264_inter_pred_luma_horz_ssse3() 280 res_r0r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0r1_t3_8x16b); in ih264_inter_pred_luma_horz_ssse3() 281 …res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t3_8x16b); //a0*c0+a1*c1+a2*c2+a3*c3… in ih264_inter_pred_luma_horz_ssse3() 358 res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b); in ih264_inter_pred_luma_horz_ssse3() 359 res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b); in ih264_inter_pred_luma_horz_ssse3() 360 res_r0_t3_8x16b = _mm_add_epi16(res_r0_t3_8x16b, const_val16_8x16b); in ih264_inter_pred_luma_horz_ssse3() 361 res_r1_t3_8x16b = _mm_add_epi16(res_r1_t3_8x16b, const_val16_8x16b); in ih264_inter_pred_luma_horz_ssse3() 362 res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b); in ih264_inter_pred_luma_horz_ssse3() 363 res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b); in ih264_inter_pred_luma_horz_ssse3() 435 res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b); in ih264_inter_pred_luma_horz_ssse3() [all …]
|
/external/libhevc/common/x86/ |
D | ihevc_inter_pred_filters_ssse3_intr.c | 356 res_temp5_8x16b = _mm_add_epi16(res_temp1_8x16b, res_temp2_8x16b); in ihevc_inter_pred_luma_horz_ssse3() 357 res_temp6_8x16b = _mm_add_epi16(res_temp3_8x16b, res_temp4_8x16b); in ihevc_inter_pred_luma_horz_ssse3() 358 res_temp5_8x16b = _mm_add_epi16(res_temp5_8x16b, res_temp6_8x16b); in ihevc_inter_pred_luma_horz_ssse3() 389 res_temp15_8x16b = _mm_add_epi16(res_temp11_8x16b, res_temp12_8x16b); in ihevc_inter_pred_luma_horz_ssse3() 390 res_temp16_8x16b = _mm_add_epi16(res_temp13_8x16b, res_temp14_8x16b); in ihevc_inter_pred_luma_horz_ssse3() 391 res_temp15_8x16b = _mm_add_epi16(res_temp15_8x16b, res_temp16_8x16b); in ihevc_inter_pred_luma_horz_ssse3() 445 res_temp5_8x16b = _mm_add_epi16(res_temp1_8x16b, res_temp2_8x16b); in ihevc_inter_pred_luma_horz_ssse3() 446 res_temp6_8x16b = _mm_add_epi16(res_temp3_8x16b, res_temp4_8x16b); in ihevc_inter_pred_luma_horz_ssse3() 447 res_temp5_8x16b = _mm_add_epi16(res_temp5_8x16b, res_temp6_8x16b); in ihevc_inter_pred_luma_horz_ssse3() 483 res_temp15_8x16b = _mm_add_epi16(res_temp11_8x16b, res_temp12_8x16b); in ihevc_inter_pred_luma_horz_ssse3() [all …]
|
D | ihevc_intra_pred_filters_ssse3_intr.c | 533 src_temp1 = _mm_add_epi16(src_temp1, src_temp2); in ihevc_intra_pred_ref_filtering_ssse3() 534 src_temp1 = _mm_add_epi16(src_temp1, src_temp3); in ihevc_intra_pred_ref_filtering_ssse3() 535 src_temp1 = _mm_add_epi16(src_temp1, const_value_8x16); in ihevc_intra_pred_ref_filtering_ssse3() 549 src_temp4 = _mm_add_epi16(src_temp4, src_temp5); in ihevc_intra_pred_ref_filtering_ssse3() 550 src_temp4 = _mm_add_epi16(src_temp4, src_temp6); in ihevc_intra_pred_ref_filtering_ssse3() 551 src_temp4 = _mm_add_epi16(src_temp4, const_value_8x16); in ihevc_intra_pred_ref_filtering_ssse3() 720 row1_16x8b = _mm_add_epi16(row_16x8b, const_temp8_4x32b); in ihevc_intra_pred_luma_planar_ssse3() 722 row2_16x8b = _mm_add_epi16(row1_16x8b, const_temp8_4x32b); in ihevc_intra_pred_luma_planar_ssse3() 724 row3_16x8b = _mm_add_epi16(row2_16x8b, const_temp8_4x32b); in ihevc_intra_pred_luma_planar_ssse3() 745 res_temp_8x16b = _mm_add_epi16(res_temp_8x16b, res_temp4_8x16b); in ihevc_intra_pred_luma_planar_ssse3() [all …]
|
/external/libavc/encoder/x86/ |
D | ih264e_intra_modes_eval_ssse3.c | 229 sad_8x16b = _mm_add_epi16(sad_8x16b, sad1_8x16b); in ih264e_evaluate_intra16x16_modes_ssse3() 280 sad_8x16b = _mm_add_epi16(sad_8x16b, sad1_8x16b); in ih264e_evaluate_intra16x16_modes_ssse3() 353 sad_8x16b = _mm_add_epi16(sad_8x16b, sad1_8x16b); in ih264e_evaluate_intra16x16_modes_ssse3() 601 …temp1_8x16b = _mm_add_epi16(temp1_8x16b, temp2_8x16b); //l3+l3 l3+l2 l2+l1... … in ih264e_evaluate_intra_4x4_modes_ssse3() 604 …temp1_8x16b = _mm_add_epi16(temp1_8x16b, temp2_8x16b); //4*l3 l3+2*l3+l2 l3+2*l2+l1...… in ih264e_evaluate_intra_4x4_modes_ssse3() 606 …temp1_8x16b = _mm_add_epi16(const_2_8x16b, temp1_8x16b); //4*l3+2 3*l3+l2+2 l3+2*l2+l1+2.… in ih264e_evaluate_intra_4x4_modes_ssse3() 617 …temp2_8x16b = _mm_add_epi16(temp2_8x16b, temp3_8x16b); //t1+t2 t2+t3... t6+t7 … in ih264e_evaluate_intra_4x4_modes_ssse3() 619 …temp2_8x16b = _mm_add_epi16(temp2_8x16b, temp3_8x16b); //t1+2*t2+t3 t2+2*t3+t4.. t6+2*t7+… in ih264e_evaluate_intra_4x4_modes_ssse3() 621 …temp2_8x16b = _mm_add_epi16(const_2_8x16b, temp2_8x16b); //t1+2*t2+t3+2 t2+2*t3+t4+2 t3+2*t… in ih264e_evaluate_intra_4x4_modes_ssse3() 885 sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b); in ih264e_evaluate_intra_chroma_modes_ssse3() [all …]
|