Home
last modified time | relevance | path

Searched refs:vy_multiplier_lo (Results 1 – 9 of 9) sorted by relevance

/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-sse41-mul16-ld64-x32.c27 const __m128i vy_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.y_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() local
50 __m128i vyprod01234567hi = _mm_mulhi_epu16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
52 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
54 __m128i vyprod89ABCDEFhi = _mm_mulhi_epu16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
56 const __m128i vyprod89ABCDEFlo = _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
58 __m128i vyprodGHIJKLMNhi = _mm_mulhi_epu16(vyGHIJKLMN, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
60 const __m128i vyprodGHIJKLMNlo = _mm_mullo_epi16(vyGHIJKLMN, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
62 __m128i vyprodOPQRSTUVhi = _mm_mulhi_epu16(vyOPQRSTUV, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
64 const __m128i vyprodOPQRSTUVlo = _mm_mullo_epi16(vyOPQRSTUV, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
76 …= _mm_sub_epi16(vyprod01234567hi, _mm_and_si128(_mm_srai_epi16(vy01234567, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
[all …]
Dminmax-sse2-mul16-ld64-x32.c27 const __m128i vy_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.y_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() local
58 __m128i vyprod01234567hi = _mm_mulhi_epu16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
60 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
62 __m128i vyprod89ABCDEFhi = _mm_mulhi_epu16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
64 const __m128i vyprod89ABCDEFlo = _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
66 __m128i vyprodGHIJKLMNhi = _mm_mulhi_epu16(vyGHIJKLMN, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
68 const __m128i vyprodGHIJKLMNlo = _mm_mullo_epi16(vyGHIJKLMN, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
70 __m128i vyprodOPQRSTUVhi = _mm_mulhi_epu16(vyOPQRSTUV, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
72 const __m128i vyprodOPQRSTUVlo = _mm_mullo_epi16(vyOPQRSTUV, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
84 …= _mm_sub_epi16(vyprod01234567hi, _mm_and_si128(_mm_srai_epi16(vy01234567, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
[all …]
Dminmax-sse41-mul16-ld64-x24.c27 const __m128i vy_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.y_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() local
48 __m128i vyprod01234567hi = _mm_mulhi_epu16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
50 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
52 __m128i vyprod89ABCDEFhi = _mm_mulhi_epu16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
54 const __m128i vyprod89ABCDEFlo = _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
56 __m128i vyprodGHIJKLMNhi = _mm_mulhi_epu16(vyGHIJKLMN, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
58 const __m128i vyprodGHIJKLMNlo = _mm_mullo_epi16(vyGHIJKLMN, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
68 …= _mm_sub_epi16(vyprod01234567hi, _mm_and_si128(_mm_srai_epi16(vy01234567, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
70 …= _mm_sub_epi16(vyprod89ABCDEFhi, _mm_and_si128(_mm_srai_epi16(vy89ABCDEF, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
72 …= _mm_sub_epi16(vyprodGHIJKLMNhi, _mm_and_si128(_mm_srai_epi16(vyGHIJKLMN, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
[all …]
Dminmax-sse2-mul16-ld64-x24.c27 const __m128i vy_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.y_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() local
54 __m128i vyprod01234567hi = _mm_mulhi_epu16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
56 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
58 __m128i vyprod89ABCDEFhi = _mm_mulhi_epu16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
60 const __m128i vyprod89ABCDEFlo = _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
62 __m128i vyprodGHIJKLMNhi = _mm_mulhi_epu16(vyGHIJKLMN, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
64 const __m128i vyprodGHIJKLMNlo = _mm_mullo_epi16(vyGHIJKLMN, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
74 …= _mm_sub_epi16(vyprod01234567hi, _mm_and_si128(_mm_srai_epi16(vy01234567, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
76 …= _mm_sub_epi16(vyprod89ABCDEFhi, _mm_and_si128(_mm_srai_epi16(vy89ABCDEF, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
78 …= _mm_sub_epi16(vyprodGHIJKLMNhi, _mm_and_si128(_mm_srai_epi16(vyGHIJKLMN, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
[all …]
Dminmax-sse2-mul16-ld64-x16.c27 const __m128i vy_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.y_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16() local
50 __m128i vyprod01234567hi = _mm_mulhi_epu16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
52 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
54 __m128i vyprod89ABCDEFhi = _mm_mulhi_epu16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
56 const __m128i vyprod89ABCDEFlo = _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
64 …= _mm_sub_epi16(vyprod01234567hi, _mm_and_si128(_mm_srai_epi16(vy01234567, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
66 …= _mm_sub_epi16(vyprod89ABCDEFhi, _mm_and_si128(_mm_srai_epi16(vy89ABCDEF, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
113 __m128i vyprod01234567hi = _mm_mulhi_epu16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
115 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
121 …= _mm_sub_epi16(vyprod01234567hi, _mm_and_si128(_mm_srai_epi16(vy01234567, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
Dminmax-sse41-mul16-ld64-x16.c27 const __m128i vy_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.y_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16() local
46 __m128i vyprod01234567hi = _mm_mulhi_epu16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
48 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
50 __m128i vyprod89ABCDEFhi = _mm_mulhi_epu16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
52 const __m128i vyprod89ABCDEFlo = _mm_mullo_epi16(vy89ABCDEF, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
60 …= _mm_sub_epi16(vyprod01234567hi, _mm_and_si128(_mm_srai_epi16(vy01234567, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
62 …= _mm_sub_epi16(vyprod89ABCDEFhi, _mm_and_si128(_mm_srai_epi16(vy89ABCDEF, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
107 __m128i vyprod01234567hi = _mm_mulhi_epu16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
109 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
115 …= _mm_sub_epi16(vyprod01234567hi, _mm_and_si128(_mm_srai_epi16(vy01234567, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
Dminmax-sse41-mul16-ld64-x8.c27 const __m128i vy_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.y_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() local
44 __m128i vyprod01234567hi = _mm_mulhi_epu16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
46 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
52 …= _mm_sub_epi16(vyprod01234567hi, _mm_and_si128(_mm_srai_epi16(vy01234567, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
84 __m128i vyprod01234567hi = _mm_mulhi_epu16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
86 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
92 …= _mm_sub_epi16(vyprod01234567hi, _mm_and_si128(_mm_srai_epi16(vy01234567, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
Dminmax-sse2-mul16-ld64-x8.c27 const __m128i vy_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.y_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() local
46 __m128i vyprod01234567hi = _mm_mulhi_epu16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
48 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
54 …= _mm_sub_epi16(vyprod01234567hi, _mm_and_si128(_mm_srai_epi16(vy01234567, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
88 __m128i vyprod01234567hi = _mm_mulhi_epu16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
90 const __m128i vyprod01234567lo = _mm_mullo_epi16(vy01234567, vy_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
96 …= _mm_sub_epi16(vyprod01234567hi, _mm_and_si128(_mm_srai_epi16(vy01234567, 15), vy_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
/external/XNNPACK/src/qs8-vadd/
Dsse-mul16-ld64.c.in28 const __m128i vy_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.y_multiplier_lo); variable
60 __m128i vyprod${ABC[N:N+8]}hi = _mm_mulhi_epu16(vy${ABC[N:N+8]}, vy_multiplier_lo);
62 const __m128i vyprod${ABC[N:N+8]}lo = _mm_mullo_epi16(vy${ABC[N:N+8]}, vy_multiplier_lo);
70 …epi16(vyprod${ABC[N:N+8]}hi, _mm_and_si128(_mm_srai_epi16(vy${ABC[N:N+8]}, 15), vy_multiplier_lo));
129 __m128i vyprod${ABC[0:8]}hi = _mm_mulhi_epu16(vy${ABC[0:8]}, vy_multiplier_lo);
131 const __m128i vyprod${ABC[0:8]}lo = _mm_mullo_epi16(vy${ABC[0:8]}, vy_multiplier_lo);
137 …sub_epi16(vyprod${ABC[0:8]}hi, _mm_and_si128(_mm_srai_epi16(vy${ABC[0:8]}, 15), vy_multiplier_lo));