/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-sse41-mul32-ld32-x32.c | 57 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 58 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 59 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 60 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 61 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 62 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 63 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 64 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 66 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 67 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() [all …]
|
D | minmax-xop-mul32-ld32-x32.c | 62 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 63 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 64 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 65 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 66 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 67 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 68 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 69 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 71 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 72 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() [all …]
|
D | minmax-xop-mul32-ld32-x24.c | 58 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 59 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 60 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 61 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 62 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 63 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 65 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 66 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 67 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 68 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() [all …]
|
D | minmax-sse2-mul16-ld64-x32.c | 76 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 77 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 78 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 79 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 80 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 81 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 82 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 83 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 85 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 86 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() [all …]
|
D | minmax-sse41-mul16-ld64-x32.c | 72 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 73 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 74 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 75 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 76 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 77 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 78 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 79 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 81 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 82 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() [all …]
|
D | minmax-sse41-mul32-ld32-x24.c | 53 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 54 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 55 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 56 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 57 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 58 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 60 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 61 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 62 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 63 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() [all …]
|
D | minmax-sse41-mul16-ld64-x24.c | 65 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 66 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 67 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 68 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 69 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 70 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 72 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 73 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 74 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() 75 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24() [all …]
|
D | minmax-sse41-mul32-ld32-x16.c | 49 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 50 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 51 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 52 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 54 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 55 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 56 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 57 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 82 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 83 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() [all …]
|
D | minmax-xop-mul32-ld32-x16.c | 54 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 55 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 56 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 57 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 59 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 60 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 61 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 62 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 87 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 88 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() [all …]
|
D | minmax-sse2-mul16-ld64-x24.c | 68 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 69 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 70 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 71 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 72 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 73 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 75 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 76 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 77 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 78 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() [all …]
|
D | minmax-sse41-mul16-ld64-x16.c | 58 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16() 59 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16() 60 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16() 61 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16() 63 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16() 64 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16() 65 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16() 66 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16() 98 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16() 99 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16() [all …]
|
D | minmax-sse2-mul16-ld64-x16.c | 60 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16() 61 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16() 62 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16() 63 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16() 65 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16() 66 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16() 67 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16() 68 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16() 101 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16() 102 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16() [all …]
|
D | minmax-sse41-mul32-ld32-x8.c | 45 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 46 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 48 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 49 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 70 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 71 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 73 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 74 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-sse41-mul32-ld32-x32.c | 73 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 74 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 75 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 76 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 77 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 78 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 79 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 80 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 82 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 83 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() [all …]
|
D | minmax-xop-mul32-ld32-x32.c | 78 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 79 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 80 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 81 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 82 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 83 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 84 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 85 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 87 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 88 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() [all …]
|
D | minmax-sse41-mul32-ld32-x24.c | 65 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 66 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 67 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 68 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 69 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 70 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 72 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 73 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 74 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 75 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() [all …]
|
D | minmax-xop-mul32-ld32-x24.c | 70 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 71 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 72 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 73 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 74 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 75 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 77 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 78 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 79 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 80 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() [all …]
|
D | minmax-sse41-mul16-ld64-x32.c | 102 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 103 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 104 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 105 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 106 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 107 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 108 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 109 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 111 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 112 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() [all …]
|
D | minmax-xop-mul32-ld32-x16.c | 62 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 63 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 64 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 65 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 67 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 68 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 69 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 70 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 101 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 102 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() [all …]
|
D | minmax-sse41-mul32-ld32-x16.c | 57 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 58 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 59 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 60 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 62 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 63 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 64 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 65 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 96 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 97 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() [all …]
|
D | minmax-sse2-mul16-ld64-x32.c | 110 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 111 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 112 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 113 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 114 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 115 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 116 …i vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 117 …i vremSTUV = _mm_add_epi32(_mm_and_si128(vaccSTUV, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 119 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 120 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() [all …]
|
D | minmax-sse41-mul16-ld64-x24.c | 88 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 89 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 90 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 91 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 92 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 93 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 95 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 96 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 97 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 98 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() [all …]
|
D | minmax-sse2-mul16-ld64-x24.c | 94 …i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 95 …i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 96 …i vrem89AB = _mm_add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 97 …i vremCDEF = _mm_add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 98 …i vremGHIJ = _mm_add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 99 …i vremKLMN = _mm_add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_s… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 101 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 102 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 103 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 104 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() [all …]
|
/external/XNNPACK/src/qu8-requantization/ |
D | q31-ssse3.c | 67 const __m128i x_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), x); in xnn_qu8_requantize_q31__ssse3() 68 const __m128i y_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), y); in xnn_qu8_requantize_q31__ssse3() 69 const __m128i z_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), z); in xnn_qu8_requantize_q31__ssse3() 70 const __m128i w_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), w); in xnn_qu8_requantize_q31__ssse3() 141 …_mm_add_epi32(_mm_and_si128(x_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), x… in xnn_qu8_requantize_q31__ssse3() 143 …_mm_add_epi32(_mm_and_si128(y_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), y… in xnn_qu8_requantize_q31__ssse3() 145 …_mm_add_epi32(_mm_and_si128(z_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), z… in xnn_qu8_requantize_q31__ssse3() 147 …_mm_add_epi32(_mm_and_si128(w_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), w… in xnn_qu8_requantize_q31__ssse3() 150 … _mm_sub_epi32(_mm_sra_epi32(x_q31product, vshift), _mm_cmpgt_epi32(x_remainder, vthreshold)); in xnn_qu8_requantize_q31__ssse3() 152 … _mm_sub_epi32(_mm_sra_epi32(y_q31product, vshift), _mm_cmpgt_epi32(y_remainder, vthreshold)); in xnn_qu8_requantize_q31__ssse3() [all …]
|
/external/XNNPACK/src/qs8-requantization/ |
D | q31-ssse3.c | 67 const __m128i x_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), x); in xnn_qs8_requantize_q31__ssse3() 68 const __m128i y_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), y); in xnn_qs8_requantize_q31__ssse3() 69 const __m128i z_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), z); in xnn_qs8_requantize_q31__ssse3() 70 const __m128i w_neg_mask = _mm_cmpgt_epi32(_mm_setzero_si128(), w); in xnn_qs8_requantize_q31__ssse3() 141 …_mm_add_epi32(_mm_and_si128(x_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), x… in xnn_qs8_requantize_q31__ssse3() 143 …_mm_add_epi32(_mm_and_si128(y_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), y… in xnn_qs8_requantize_q31__ssse3() 145 …_mm_add_epi32(_mm_and_si128(z_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), z… in xnn_qs8_requantize_q31__ssse3() 147 …_mm_add_epi32(_mm_and_si128(w_q31product, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), w… in xnn_qs8_requantize_q31__ssse3() 150 … _mm_sub_epi32(_mm_sra_epi32(x_q31product, vshift), _mm_cmpgt_epi32(x_remainder, vthreshold)); in xnn_qs8_requantize_q31__ssse3() 152 … _mm_sub_epi32(_mm_sra_epi32(y_q31product, vshift), _mm_cmpgt_epi32(y_remainder, vthreshold)); in xnn_qs8_requantize_q31__ssse3() [all …]
|