Home
last modified time | relevance | path

Searched refs:_mm_cmpgt_epi32 (Results 1 – 25 of 242) sorted by relevance

12345678910

/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-sse41-mul32-ld32-x32.c57 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
58 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
59 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
60 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
61 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
62 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
63 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
64 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
66 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
67 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
[all …]
Dminmax-xop-mul32-ld32-x32.c62 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
63 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
64 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
65 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
66 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
67 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
68 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
69 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
71 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
72 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
[all …]
Dminmax-xop-mul32-ld32-x24.c58 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
59 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
60 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
61 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
62 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
63 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
65 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
66 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
67 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
68 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
[all …]
Dminmax-sse2-mul16-ld64-x32.c76 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
77 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
78 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
79 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
80 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
81 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
82 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
83 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
85 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
86 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
[all …]
Dminmax-sse41-mul16-ld64-x32.c72 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
73 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
74 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
75 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
76 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
77 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
78 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
79 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
81 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
82 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
[all …]
Dminmax-sse41-mul32-ld32-x24.c53 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
54 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
55 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
56 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
57 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
58 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
60 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
61 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
62 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
63 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
[all …]
Dminmax-sse41-mul16-ld64-x24.c65 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
66 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
67 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
68 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
69 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
70 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
72 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
73 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
74 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
75 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24()
[all …]
Dminmax-sse41-mul32-ld32-x16.c49 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
50 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
51 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
52 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
54 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
55 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
56 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
57 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
82 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
83 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
[all …]
Dminmax-xop-mul32-ld32-x16.c54 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
55 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
56 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
57 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
59 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
60 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
61 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
62 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
87 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
88 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
[all …]
Dminmax-sse2-mul16-ld64-x24.c68 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
69 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
70 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
71 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
72 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
73 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
75 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
76 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
77 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
78 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
[all …]
Dminmax-sse41-mul16-ld64-x16.c58 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
59 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
60 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
61 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
63 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
64 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
65 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
66 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
98 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
99 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16()
[all …]
Dminmax-sse2-mul16-ld64-x16.c60 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
61 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
62 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
63 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
65 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
66 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
67 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
68 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
101 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
102 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
[all …]
Dminmax-sse41-mul32-ld32-x8.c45 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
46 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
48 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
49 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
70 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
71 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
73 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
74 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-sse41-mul32-ld32-x32.c73 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
74 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
75 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
76 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
77 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
78 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
79 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
80 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
82 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
83 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
[all …]
Dminmax-xop-mul32-ld32-x32.c78 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
79 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
80 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
81 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
82 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
83 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
84 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
85 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
87 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
88 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
[all …]
Dminmax-sse41-mul32-ld32-x24.c65 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
66 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
67 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
68 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
69 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
70 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
72 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
73 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
74 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
75 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
[all …]
Dminmax-xop-mul32-ld32-x24.c70 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
71 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
72 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
73 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
74 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
75 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
77 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
78 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
79 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
80 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
[all …]
Dminmax-sse41-mul16-ld64-x32.c102 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
103 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
104 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
105 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
106 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
107 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
108 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
109 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
111 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
112 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
[all …]
Dminmax-xop-mul32-ld32-x16.c62 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
63 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
64 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
65 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
67 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
68 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
69 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
70 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
101 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
102 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
[all …]
Dminmax-sse41-mul32-ld32-x16.c57 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
58 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
59 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
60 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
62 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
63 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
64 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
65 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
96 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
97 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
[all …]
Dminmax-sse2-mul16-ld64-x32.c110 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
111 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
112 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
113 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
114 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
115 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
116 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
117 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
119 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
120 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
[all …]
Dminmax-sse41-mul16-ld64-x24.c88 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
89 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
90 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
91 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
92 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
93 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
95 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
96 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
97 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
98 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
[all …]
Dminmax-sse2-mul16-ld64-x24.c94 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
95 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
96 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
97 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
98 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
99 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
101 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
102 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
103 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
104 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
[all …]
/external/XNNPACK/src/qu8-requantization/
Dq31-ssse3.c67 const __m128i x_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), x); in xnn_qu8_requantize_q31__ssse3()
68 const __m128i y_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), y); in xnn_qu8_requantize_q31__ssse3()
69 const __m128i z_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), z); in xnn_qu8_requantize_q31__ssse3()
70 const __m128i w_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), w); in xnn_qu8_requantize_q31__ssse3()
141 …_mm_add_epi32(_mm_and_si128(x_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), x… in xnn_qu8_requantize_q31__ssse3()
143 …_mm_add_epi32(_mm_and_si128(y_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), y… in xnn_qu8_requantize_q31__ssse3()
145 …_mm_add_epi32(_mm_and_si128(z_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), z… in xnn_qu8_requantize_q31__ssse3()
147 …_mm_add_epi32(_mm_and_si128(w_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), w… in xnn_qu8_requantize_q31__ssse3()
150 … _mm_sub_epi32(_mm_sra_epi32(x_q31product, vshift), _mm_cmpgt_epi32(x_remainder, vthreshold)); in xnn_qu8_requantize_q31__ssse3()
152 … _mm_sub_epi32(_mm_sra_epi32(y_q31product, vshift), _mm_cmpgt_epi32(y_remainder, vthreshold)); in xnn_qu8_requantize_q31__ssse3()
[all …]
/external/XNNPACK/src/qs8-requantization/
Dq31-ssse3.c67 const __m128i x_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), x); in xnn_qs8_requantize_q31__ssse3()
68 const __m128i y_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), y); in xnn_qs8_requantize_q31__ssse3()
69 const __m128i z_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), z); in xnn_qs8_requantize_q31__ssse3()
70 const __m128i w_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), w); in xnn_qs8_requantize_q31__ssse3()
141 …_mm_add_epi32(_mm_and_si128(x_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), x… in xnn_qs8_requantize_q31__ssse3()
143 …_mm_add_epi32(_mm_and_si128(y_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), y… in xnn_qs8_requantize_q31__ssse3()
145 …_mm_add_epi32(_mm_and_si128(z_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), z… in xnn_qs8_requantize_q31__ssse3()
147 …_mm_add_epi32(_mm_and_si128(w_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), w… in xnn_qs8_requantize_q31__ssse3()
150 … _mm_sub_epi32(_mm_sra_epi32(x_q31product, vshift), _mm_cmpgt_epi32(x_remainder, vthreshold)); in xnn_qs8_requantize_q31__ssse3()
152 … _mm_sub_epi32(_mm_sra_epi32(y_q31product, vshift), _mm_cmpgt_epi32(y_remainder, vthreshold)); in xnn_qs8_requantize_q31__ssse3()
[all …]

12345678910