/external/libavc/common/x86/ |
D | ih264_weighted_pred_sse42.c | 419 y_0_8x16b = _mm_mullo_epi16(y_0_8x16b, wt_8x16b); in ih264_weighted_pred_luma_sse42() 420 y_2_8x16b = _mm_mullo_epi16(y_2_8x16b, wt_8x16b); in ih264_weighted_pred_luma_sse42() 463 y_0_8x16b = _mm_mullo_epi16(y_0_8x16b, wt_8x16b); in ih264_weighted_pred_luma_sse42() 464 y_1_8x16b = _mm_mullo_epi16(y_1_8x16b, wt_8x16b); in ih264_weighted_pred_luma_sse42() 465 y_2_8x16b = _mm_mullo_epi16(y_2_8x16b, wt_8x16b); in ih264_weighted_pred_luma_sse42() 466 y_3_8x16b = _mm_mullo_epi16(y_3_8x16b, wt_8x16b); in ih264_weighted_pred_luma_sse42() 523 y_0L_8x16b = _mm_mullo_epi16(y_0L_8x16b, wt_8x16b); in ih264_weighted_pred_luma_sse42() 524 y_0H_8x16b = _mm_mullo_epi16(y_0H_8x16b, wt_8x16b); in ih264_weighted_pred_luma_sse42() 525 y_1L_8x16b = _mm_mullo_epi16(y_1L_8x16b, wt_8x16b); in ih264_weighted_pred_luma_sse42() 526 y_1H_8x16b = _mm_mullo_epi16(y_1H_8x16b, wt_8x16b); in ih264_weighted_pred_luma_sse42() [all …]
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-sse41-mul16-ld64-x32.c | 51 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 52 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 55 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 56 const __m128i vyprod89ABCDEFlo = _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 59 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 60 const __m128i vyprodGHIJKLMNlo = _mm_mullo_epi16(vyGHIJKLMN, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 63 const __m128i vxprodOPQRSTUVlo = _mm_mullo_epi16(vxOPQRSTUV, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 64 const __m128i vyprodOPQRSTUVlo = _mm_mullo_epi16(vyOPQRSTUV, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 66 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 67 … vyprod01234567hi = _mm_add_epi16(vyprod01234567hi, _mm_mullo_epi16(vy01234567, vy_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() [all …]
|
D | minmax-sse2-mul16-ld64-x32.c | 59 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 60 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 63 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 64 const __m128i vyprod89ABCDEFlo = _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 67 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 68 const __m128i vyprodGHIJKLMNlo = _mm_mullo_epi16(vyGHIJKLMN, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 71 const __m128i vxprodOPQRSTUVlo = _mm_mullo_epi16(vxOPQRSTUV, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 72 const __m128i vyprodOPQRSTUVlo = _mm_mullo_epi16(vyOPQRSTUV, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 74 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 75 … vyprod01234567hi = _mm_add_epi16(vyprod01234567hi, _mm_mullo_epi16(vy01234567, vy_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() [all …]
|
D | minmax-sse41-mul16-ld64-x24.c | 49 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 50 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 53 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 54 const __m128i vyprod89ABCDEFlo = _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 57 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 58 const __m128i vyprodGHIJKLMNlo = _mm_mullo_epi16(vyGHIJKLMN, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 60 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 61 … vyprod01234567hi = _mm_add_epi16(vyprod01234567hi, _mm_mullo_epi16(vy01234567, vy_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 62 … vxprod89ABCDEFhi = _mm_add_epi16(vxprod89ABCDEFhi, _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 63 … vyprod89ABCDEFhi = _mm_add_epi16(vyprod89ABCDEFhi, _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() [all …]
|
D | minmax-sse2-mul16-ld64-x24.c | 55 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 56 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 59 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 60 const __m128i vyprod89ABCDEFlo = _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 63 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 64 const __m128i vyprodGHIJKLMNlo = _mm_mullo_epi16(vyGHIJKLMN, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 66 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 67 … vyprod01234567hi = _mm_add_epi16(vyprod01234567hi, _mm_mullo_epi16(vy01234567, vy_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 68 … vxprod89ABCDEFhi = _mm_add_epi16(vxprod89ABCDEFhi, _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 69 … vyprod89ABCDEFhi = _mm_add_epi16(vyprod89ABCDEFhi, _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() [all …]
|
D | minmax-sse2-mul16-ld64-x16.c | 51 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16() 52 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16() 55 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16() 56 const __m128i vyprod89ABCDEFlo = _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16() 58 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16() 59 … vyprod01234567hi = _mm_add_epi16(vyprod01234567hi, _mm_mullo_epi16(vy01234567, vy_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16() 60 … vxprod89ABCDEFhi = _mm_add_epi16(vxprod89ABCDEFhi, _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16() 61 … vyprod89ABCDEFhi = _mm_add_epi16(vyprod89ABCDEFhi, _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16() 114 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16() 115 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16() [all …]
|
D | minmax-sse41-mul16-ld64-x16.c | 47 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16() 48 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16() 51 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16() 52 const __m128i vyprod89ABCDEFlo = _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16() 54 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16() 55 … vyprod01234567hi = _mm_add_epi16(vyprod01234567hi, _mm_mullo_epi16(vy01234567, vy_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16() 56 … vxprod89ABCDEFhi = _mm_add_epi16(vxprod89ABCDEFhi, _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16() 57 … vyprod89ABCDEFhi = _mm_add_epi16(vyprod89ABCDEFhi, _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16() 108 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16() 109 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16() [all …]
|
D | minmax-sse41-mul16-ld64-x8.c | 45 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 46 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 48 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 49 … vyprod01234567hi = _mm_add_epi16(vyprod01234567hi, _mm_mullo_epi16(vy01234567, vy_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 85 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 86 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 88 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 89 … vyprod01234567hi = _mm_add_epi16(vyprod01234567hi, _mm_mullo_epi16(vy01234567, vy_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
|
D | minmax-sse2-mul16-ld64-x8.c | 47 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() 48 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() 50 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() 51 … vyprod01234567hi = _mm_add_epi16(vyprod01234567hi, _mm_mullo_epi16(vy01234567, vy_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() 89 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() 90 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() 92 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() 93 … vyprod01234567hi = _mm_add_epi16(vyprod01234567hi, _mm_mullo_epi16(vy01234567, vy_multiplier_hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
|
/external/libvpx/libvpx/vp8/common/x86/ |
D | bilinear_filter_sse2.c | 47 const __m128i a_lo_filtered = _mm_mullo_epi16(a_lo, hfilter_0); in horizontal_16x16() 48 const __m128i a_hi_filtered = _mm_mullo_epi16(a_hi, hfilter_0); in horizontal_16x16() 53 const __m128i b_lo_filtered = _mm_mullo_epi16(b_lo, hfilter_1); in horizontal_16x16() 54 const __m128i b_hi_filtered = _mm_mullo_epi16(b_hi, hfilter_1); in horizontal_16x16() 100 const __m128i row_0_lo_filtered = _mm_mullo_epi16(row_0_lo, vfilter_0); in vertical_16x16() 101 const __m128i row_0_hi_filtered = _mm_mullo_epi16(row_0_hi, vfilter_0); in vertical_16x16() 105 const __m128i row_1_lo_filtered = _mm_mullo_epi16(row_1_lo, vfilter_1); in vertical_16x16() 106 const __m128i row_1_hi_filtered = _mm_mullo_epi16(row_1_hi, vfilter_1); in vertical_16x16() 172 const __m128i a_filtered = _mm_mullo_epi16(a_u16, hfilter_0); in horizontal_8xN() 173 const __m128i b_filtered = _mm_mullo_epi16(b_u16, hfilter_1); in horizontal_8xN() [all …]
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up24x9-minmax-sse41-mul16.c | 106 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 108 const __m128i vp0x89ABCDEFlo = _mm_mullo_epi16(vxi0x89ABCDEF, vxk0x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 110 const __m128i vp0xGHIJKLMNlo = _mm_mullo_epi16(vxi0xGHIJKLMN, vxk0xGHIJKLMN); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 135 const __m128i vp1x01234567lo = _mm_mullo_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 137 const __m128i vp1x89ABCDEFlo = _mm_mullo_epi16(vxi1x89ABCDEF, vxk1x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 139 const __m128i vp1xGHIJKLMNlo = _mm_mullo_epi16(vxi1xGHIJKLMN, vxk1xGHIJKLMN); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 164 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 166 const __m128i vp2x89ABCDEFlo = _mm_mullo_epi16(vxi2x89ABCDEF, vxk2x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 168 const __m128i vp2xGHIJKLMNlo = _mm_mullo_epi16(vxi2xGHIJKLMN, vxk2xGHIJKLMN); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 193 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() [all …]
|
D | up16x9-minmax-sse41-mul16.c | 100 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 102 const __m128i vp0x89ABCDEFlo = _mm_mullo_epi16(vxi0x89ABCDEF, vxk0x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 121 const __m128i vp1x01234567lo = _mm_mullo_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 123 const __m128i vp1x89ABCDEFlo = _mm_mullo_epi16(vxi1x89ABCDEF, vxk1x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 142 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 144 const __m128i vp2x89ABCDEFlo = _mm_mullo_epi16(vxi2x89ABCDEF, vxk2x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 163 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 165 const __m128i vp3x89ABCDEFlo = _mm_mullo_epi16(vxi3x89ABCDEF, vxk3x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 184 const __m128i vp4x01234567lo = _mm_mullo_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 186 const __m128i vp4x89ABCDEFlo = _mm_mullo_epi16(vxi4x89ABCDEF, vxk4x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() [all …]
|
D | up8x9-minmax-sse41-mul16.c | 94 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 107 const __m128i vp1x01234567lo = _mm_mullo_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 120 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 133 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 146 const __m128i vp4x01234567lo = _mm_mullo_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 159 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 172 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 185 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 198 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 261 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() [all …]
|
D | up16x9-minmax-ssse3-mul16.c | 100 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 102 const __m128i vp0x89ABCDEFlo = _mm_mullo_epi16(vxi0x89ABCDEF, vxk0x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 121 const __m128i vp1x01234567lo = _mm_mullo_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 123 const __m128i vp1x89ABCDEFlo = _mm_mullo_epi16(vxi1x89ABCDEF, vxk1x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 142 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 144 const __m128i vp2x89ABCDEFlo = _mm_mullo_epi16(vxi2x89ABCDEF, vxk2x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 163 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 165 const __m128i vp3x89ABCDEFlo = _mm_mullo_epi16(vxi3x89ABCDEF, vxk3x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 184 const __m128i vp4x01234567lo = _mm_mullo_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 186 const __m128i vp4x89ABCDEFlo = _mm_mullo_epi16(vxi4x89ABCDEF, vxk4x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() [all …]
|
D | up16x9-minmax-sse2-mul16.c | 100 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 102 const __m128i vp0x89ABCDEFlo = _mm_mullo_epi16(vxi0x89ABCDEF, vxk0x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 121 const __m128i vp1x01234567lo = _mm_mullo_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 123 const __m128i vp1x89ABCDEFlo = _mm_mullo_epi16(vxi1x89ABCDEF, vxk1x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 142 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 144 const __m128i vp2x89ABCDEFlo = _mm_mullo_epi16(vxi2x89ABCDEF, vxk2x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 163 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 165 const __m128i vp3x89ABCDEFlo = _mm_mullo_epi16(vxi3x89ABCDEF, vxk3x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 184 const __m128i vp4x01234567lo = _mm_mullo_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 186 const __m128i vp4x89ABCDEFlo = _mm_mullo_epi16(vxi4x89ABCDEF, vxk4x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() [all …]
|
D | up24x9-minmax-ssse3-mul16.c | 106 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 108 const __m128i vp0x89ABCDEFlo = _mm_mullo_epi16(vxi0x89ABCDEF, vxk0x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 110 const __m128i vp0xGHIJKLMNlo = _mm_mullo_epi16(vxi0xGHIJKLMN, vxk0xGHIJKLMN); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 135 const __m128i vp1x01234567lo = _mm_mullo_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 137 const __m128i vp1x89ABCDEFlo = _mm_mullo_epi16(vxi1x89ABCDEF, vxk1x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 139 const __m128i vp1xGHIJKLMNlo = _mm_mullo_epi16(vxi1xGHIJKLMN, vxk1xGHIJKLMN); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 164 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 166 const __m128i vp2x89ABCDEFlo = _mm_mullo_epi16(vxi2x89ABCDEF, vxk2x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 168 const __m128i vp2xGHIJKLMNlo = _mm_mullo_epi16(vxi2xGHIJKLMN, vxk2xGHIJKLMN); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 193 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() [all …]
|
D | up24x9-minmax-sse2-mul16.c | 106 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 108 const __m128i vp0x89ABCDEFlo = _mm_mullo_epi16(vxi0x89ABCDEF, vxk0x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 110 const __m128i vp0xGHIJKLMNlo = _mm_mullo_epi16(vxi0xGHIJKLMN, vxk0xGHIJKLMN); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 135 const __m128i vp1x01234567lo = _mm_mullo_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 137 const __m128i vp1x89ABCDEFlo = _mm_mullo_epi16(vxi1x89ABCDEF, vxk1x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 139 const __m128i vp1xGHIJKLMNlo = _mm_mullo_epi16(vxi1xGHIJKLMN, vxk1xGHIJKLMN); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 164 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 166 const __m128i vp2x89ABCDEFlo = _mm_mullo_epi16(vxi2x89ABCDEF, vxk2x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 168 const __m128i vp2xGHIJKLMNlo = _mm_mullo_epi16(vxi2xGHIJKLMN, vxk2xGHIJKLMN); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 193 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() [all …]
|
/external/libgav1/libgav1/src/dsp/x86/ |
D | intrapred_smooth_sse4.cc | 62 const __m128i weighted_left_y = _mm_mullo_epi16(left_y, weights); in WriteSmoothHorizontalSum4() 76 const __m128i weighted_px = _mm_mullo_epi16(pixels, weights); in SmoothDirectionalSum8() 101 const __m128i weighted_px1 = _mm_mullo_epi16(pixels1, weights1); in WriteSmoothDirectionalSum16() 102 const __m128i weighted_px2 = _mm_mullo_epi16(pixels2, weights2); in WriteSmoothDirectionalSum16() 229 const __m128i scaled_top_right = _mm_mullo_epi16(inverted_weights, top_right); in Smooth4x4_SSE4_1() 231 _mm_mullo_epi16(inverted_weights, bottom_left); in Smooth4x4_SSE4_1() 493 _mm_mullo_epi16(scale_m_weights_y, bottom_left); in SmoothWxH() 515 _mm_mullo_epi16(inverted_weights_x, top_right); in SmoothWxH() 544 const __m128i scaled_top_right = _mm_mullo_epi16(inverted_weights, top_right); in SmoothHorizontal4x4_SSE4_1() 564 const __m128i scaled_top_right = _mm_mullo_epi16(inverted_weights, top_right); in SmoothHorizontal4x8_SSE4_1() [all …]
|
/external/XNNPACK/src/qu8-vadd/ |
D | minmax-sse2.c | 43 const __m128i va_product_lo = _mm_mullo_epi16(vxa, va_multiplier_lo); in xnn_qu8_vadd_minmax_ukernel__sse2() 45 _mm_add_epi16(_mm_mulhi_epu16(vxa, va_multiplier_lo), _mm_mullo_epi16(vxa, va_multiplier_hi)); in xnn_qu8_vadd_minmax_ukernel__sse2() 47 const __m128i vb_product_lo = _mm_mullo_epi16(vxb, vb_multiplier_lo); in xnn_qu8_vadd_minmax_ukernel__sse2() 49 _mm_add_epi16(_mm_mulhi_epu16(vxb, vb_multiplier_lo), _mm_mullo_epi16(vxb, vb_multiplier_hi)); in xnn_qu8_vadd_minmax_ukernel__sse2() 85 const __m128i va_product_lo = _mm_mullo_epi16(vxa, va_multiplier_lo); in xnn_qu8_vadd_minmax_ukernel__sse2() 87 _mm_add_epi16(_mm_mulhi_epu16(vxa, va_multiplier_lo), _mm_mullo_epi16(vxa, va_multiplier_hi)); in xnn_qu8_vadd_minmax_ukernel__sse2() 89 const __m128i vb_product_lo = _mm_mullo_epi16(vxb, vb_multiplier_lo); in xnn_qu8_vadd_minmax_ukernel__sse2() 91 _mm_add_epi16(_mm_mulhi_epu16(vxb, vb_multiplier_lo), _mm_mullo_epi16(vxb, vb_multiplier_hi)); in xnn_qu8_vadd_minmax_ukernel__sse2()
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-sse2-mul16-ld64-x32.c | 49 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 51 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 53 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 55 const __m128i vxprodOPQRSTUVlo = _mm_mullo_epi16(vxOPQRSTUV, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 57 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 58 … vxprod89ABCDEFhi = _mm_add_epi16(vxprod89ABCDEFhi, _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 59 … vxprodGHIJKLMNhi = _mm_add_epi16(vxprodGHIJKLMNhi, _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 60 … vxprodOPQRSTUVhi = _mm_add_epi16(vxprodOPQRSTUVhi, _mm_mullo_epi16(vxOPQRSTUV, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 124 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 126 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
|
D | minmax-sse41-mul16-ld64-x32.c | 45 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 47 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 49 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 51 const __m128i vxprodOPQRSTUVlo = _mm_mullo_epi16(vxOPQRSTUV, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 53 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 54 … vxprod89ABCDEFhi = _mm_add_epi16(vxprod89ABCDEFhi, _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 55 … vxprodGHIJKLMNhi = _mm_add_epi16(vxprodGHIJKLMNhi, _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 56 … vxprodOPQRSTUVhi = _mm_add_epi16(vxprodOPQRSTUVhi, _mm_mullo_epi16(vxOPQRSTUV, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 119 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 121 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
|
D | minmax-sse41-mul16-ld64-x24.c | 44 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 46 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 48 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 50 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 51 … vxprod89ABCDEFhi = _mm_add_epi16(vxprod89ABCDEFhi, _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 52 … vxprodGHIJKLMNhi = _mm_add_epi16(vxprodGHIJKLMNhi, _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 105 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 107 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
|
D | minmax-sse2-mul16-ld64-x24.c | 47 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 49 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 51 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 53 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 54 … vxprod89ABCDEFhi = _mm_add_epi16(vxprod89ABCDEFhi, _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 55 … vxprodGHIJKLMNhi = _mm_add_epi16(vxprodGHIJKLMNhi, _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 109 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 111 … vxprod01234567hi = _mm_add_epi16(vxprod01234567hi, _mm_mullo_epi16(vx01234567, vx_multiplier_hi)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
|
/external/libaom/libaom/aom_dsp/x86/ |
D | blend_sse4.h | 33 const __m128i v_p0_w = _mm_mullo_epi16(v_s0_w, *v_m0_w); in blend_4() 34 const __m128i v_p1_w = _mm_mullo_epi16(v_s1_w, *v_m1_w); in blend_4() 48 const __m128i v_p0_w = _mm_mullo_epi16(v_s0_w, *v_m0_w); in blend_8() 49 const __m128i v_p1_w = _mm_mullo_epi16(v_s1_w, *v_m1_w); in blend_8() 111 const __m128i v_p0_w = _mm_mullo_epi16(v_s0_w, v_m0_w); in blend_4_b10() 112 const __m128i v_p1_w = _mm_mullo_epi16(v_s1_w, v_m1_w); in blend_4_b10() 126 const __m128i v_p0_w = _mm_mullo_epi16(v_s0_w, v_m0_w); in blend_8_b10() 127 const __m128i v_p1_w = _mm_mullo_epi16(v_s1_w, v_m1_w); in blend_8_b10()
|
/external/XNNPACK/src/qu8-dwconv/ |
D | up8x9-minmax-sse2.c | 79 const __m128i vprod0_odd = _mm_mullo_epi16(vxi0, vxk0); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 88 const __m128i vprod1_odd = _mm_mullo_epi16(vxi1, vxk1); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 97 const __m128i vprod2_odd = _mm_mullo_epi16(vxi2, vxk2); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 106 const __m128i vprod3_odd = _mm_mullo_epi16(vxi3, vxk3); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 115 const __m128i vprod4_odd = _mm_mullo_epi16(vxi4, vxk4); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 124 const __m128i vprod5_odd = _mm_mullo_epi16(vxi5, vxk5); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 133 const __m128i vprod6_odd = _mm_mullo_epi16(vxi6, vxk6); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 142 const __m128i vprod7_odd = _mm_mullo_epi16(vxi7, vxk7); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 151 const __m128i vprod8_odd = _mm_mullo_epi16(vxi8, vxk8); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 231 const __m128i vprod0_odd = _mm_mullo_epi16(vxi0, vxk0); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() [all …]
|