Home
last modified time | relevance | path

Searched refs:vx_multiplier_lo (Results 1 – 18 of 18) sorted by relevance

/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-sse2-mul16-ld64-x32.c24 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() local
48 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
49 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
50 __m128i vxprod89ABCDEFhi = _mm_mulhi_epu16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
51 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
52 __m128i vxprodGHIJKLMNhi = _mm_mulhi_epu16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
53 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
54 __m128i vxprodOPQRSTUVhi = _mm_mulhi_epu16(vxOPQRSTUV, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
55 const __m128i vxprodOPQRSTUVlo = _mm_mullo_epi16(vxOPQRSTUV, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
62 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
[all …]
Dminmax-sse41-mul16-ld64-x32.c24 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() local
44 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
45 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
46 __m128i vxprod89ABCDEFhi = _mm_mulhi_epu16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
47 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
48 __m128i vxprodGHIJKLMNhi = _mm_mulhi_epu16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
49 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
50 __m128i vxprodOPQRSTUVhi = _mm_mulhi_epu16(vxOPQRSTUV, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
51 const __m128i vxprodOPQRSTUVlo = _mm_mullo_epi16(vxOPQRSTUV, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
58 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
[all …]
Dminmax-sse41-mul16-ld64-x24.c24 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() local
43 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
44 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
45 __m128i vxprod89ABCDEFhi = _mm_mulhi_epu16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
46 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
47 __m128i vxprodGHIJKLMNhi = _mm_mulhi_epu16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
48 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
54 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
55 …= _mm_sub_epi16(vxprod89ABCDEFhi, _mm_and_si128(_mm_srai_epi16(vx89ABCDEF, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
56 …= _mm_sub_epi16(vxprodGHIJKLMNhi, _mm_and_si128(_mm_srai_epi16(vxGHIJKLMN, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
[all …]
Dminmax-sse2-mul16-ld64-x24.c24 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() local
46 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
47 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
48 __m128i vxprod89ABCDEFhi = _mm_mulhi_epu16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
49 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
50 __m128i vxprodGHIJKLMNhi = _mm_mulhi_epu16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
51 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
57 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
58 …= _mm_sub_epi16(vxprod89ABCDEFhi, _mm_and_si128(_mm_srai_epi16(vx89ABCDEF, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
59 …= _mm_sub_epi16(vxprodGHIJKLMNhi, _mm_and_si128(_mm_srai_epi16(vxGHIJKLMN, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
[all …]
Dminmax-sse41-mul16-ld64-x16.c24 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16() local
42 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
43 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
44 __m128i vxprod89ABCDEFhi = _mm_mulhi_epu16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
45 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
50 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
51 …= _mm_sub_epi16(vxprod89ABCDEFhi, _mm_and_si128(_mm_srai_epi16(vx89ABCDEF, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
88 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
89 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
93 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
Dminmax-sse2-mul16-ld64-x16.c24 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16() local
44 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
45 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
46 __m128i vxprod89ABCDEFhi = _mm_mulhi_epu16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
47 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
52 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
53 …= _mm_sub_epi16(vxprod89ABCDEFhi, _mm_and_si128(_mm_srai_epi16(vx89ABCDEF, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
91 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
92 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
96 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
Dminmax-sse41-mul16-ld64-x8.c24 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() local
41 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
42 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
46 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
73 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
74 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
78 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
Dminmax-sse2-mul16-ld64-x8.c24 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() local
42 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
43 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
47 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
75 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
76 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
80 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-sse41-mul16-ld64-x32.c25 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() local
49 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
51 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
53 __m128i vxprod89ABCDEFhi = _mm_mulhi_epu16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
55 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
57 __m128i vxprodGHIJKLMNhi = _mm_mulhi_epu16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
59 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
61 __m128i vxprodOPQRSTUVhi = _mm_mulhi_epu16(vxOPQRSTUV, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
63 const __m128i vxprodOPQRSTUVlo = _mm_mullo_epi16(vxOPQRSTUV, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
75 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
[all …]
Dminmax-sse2-mul16-ld64-x32.c25 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() local
57 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
59 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
61 __m128i vxprod89ABCDEFhi = _mm_mulhi_epu16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
63 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
65 __m128i vxprodGHIJKLMNhi = _mm_mulhi_epu16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
67 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
69 __m128i vxprodOPQRSTUVhi = _mm_mulhi_epu16(vxOPQRSTUV, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
71 const __m128i vxprodOPQRSTUVlo = _mm_mullo_epi16(vxOPQRSTUV, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
83 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
[all …]
Dminmax-sse41-mul16-ld64-x24.c25 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() local
47 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
49 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
51 __m128i vxprod89ABCDEFhi = _mm_mulhi_epu16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
53 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
55 __m128i vxprodGHIJKLMNhi = _mm_mulhi_epu16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
57 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
67 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
69 …= _mm_sub_epi16(vxprod89ABCDEFhi, _mm_and_si128(_mm_srai_epi16(vx89ABCDEF, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
71 …= _mm_sub_epi16(vxprodGHIJKLMNhi, _mm_and_si128(_mm_srai_epi16(vxGHIJKLMN, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
[all …]
Dminmax-sse2-mul16-ld64-x24.c25 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() local
53 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
55 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
57 __m128i vxprod89ABCDEFhi = _mm_mulhi_epu16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
59 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
61 __m128i vxprodGHIJKLMNhi = _mm_mulhi_epu16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
63 const __m128i vxprodGHIJKLMNlo = _mm_mullo_epi16(vxGHIJKLMN, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
73 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
75 …= _mm_sub_epi16(vxprod89ABCDEFhi, _mm_and_si128(_mm_srai_epi16(vx89ABCDEF, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
77 …= _mm_sub_epi16(vxprodGHIJKLMNhi, _mm_and_si128(_mm_srai_epi16(vxGHIJKLMN, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
[all …]
Dminmax-sse2-mul16-ld64-x16.c25 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16() local
49 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
51 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
53 __m128i vxprod89ABCDEFhi = _mm_mulhi_epu16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
55 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
63 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
65 …= _mm_sub_epi16(vxprod89ABCDEFhi, _mm_and_si128(_mm_srai_epi16(vx89ABCDEF, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
112 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
114 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
120 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
Dminmax-sse41-mul16-ld64-x16.c25 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16() local
45 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
47 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
49 __m128i vxprod89ABCDEFhi = _mm_mulhi_epu16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
51 const __m128i vxprod89ABCDEFlo = _mm_mullo_epi16(vx89ABCDEF, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
59 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
61 …= _mm_sub_epi16(vxprod89ABCDEFhi, _mm_and_si128(_mm_srai_epi16(vx89ABCDEF, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
106 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
108 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
114 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16()
Dminmax-sse41-mul16-ld64-x8.c25 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() local
43 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
45 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
51 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
83 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
85 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
91 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
Dminmax-sse2-mul16-ld64-x8.c25 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() local
45 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
47 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
53 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
87 __m128i vxprod01234567hi = _mm_mulhi_epu16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
89 const __m128i vxprod01234567lo = _mm_mullo_epi16(vx01234567, vx_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
95 …= _mm_sub_epi16(vxprod01234567hi, _mm_and_si128(_mm_srai_epi16(vx01234567, 15), vx_multiplier_lo)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
/external/XNNPACK/src/qs8-vaddc/
Dsse-mul16-ld64.c.in25 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); variable
53 __m128i vxprod${ABC[N:N+8]}hi = _mm_mulhi_epu16(vx${ABC[N:N+8]}, vx_multiplier_lo);
54 const __m128i vxprod${ABC[N:N+8]}lo = _mm_mullo_epi16(vx${ABC[N:N+8]}, vx_multiplier_lo);
60 …epi16(vxprod${ABC[N:N+8]}hi, _mm_and_si128(_mm_srai_epi16(vx${ABC[N:N+8]}, 15), vx_multiplier_lo));
110 __m128i vxprod${ABC[0:8]}hi = _mm_mulhi_epu16(vx${ABC[0:8]}, vx_multiplier_lo);
111 const __m128i vxprod${ABC[0:8]}lo = _mm_mullo_epi16(vx${ABC[0:8]}, vx_multiplier_lo);
115 …sub_epi16(vxprod${ABC[0:8]}hi, _mm_and_si128(_mm_srai_epi16(vx${ABC[0:8]}, 15), vx_multiplier_lo));
/external/XNNPACK/src/qs8-vadd/
Dsse-mul16-ld64.c.in26 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); variable
59 __m128i vxprod${ABC[N:N+8]}hi = _mm_mulhi_epu16(vx${ABC[N:N+8]}, vx_multiplier_lo);
61 const __m128i vxprod${ABC[N:N+8]}lo = _mm_mullo_epi16(vx${ABC[N:N+8]}, vx_multiplier_lo);
69 …epi16(vxprod${ABC[N:N+8]}hi, _mm_and_si128(_mm_srai_epi16(vx${ABC[N:N+8]}, 15), vx_multiplier_lo));
128 __m128i vxprod${ABC[0:8]}hi = _mm_mulhi_epu16(vx${ABC[0:8]}, vx_multiplier_lo);
130 const __m128i vxprod${ABC[0:8]}lo = _mm_mullo_epi16(vx${ABC[0:8]}, vx_multiplier_lo);
136 …sub_epi16(vxprod${ABC[0:8]}hi, _mm_and_si128(_mm_srai_epi16(vx${ABC[0:8]}, 15), vx_multiplier_lo));