/external/XNNPACK/src/qc8-dwconv/gen/ |
D | up24x25-minmax-fp32-sse41-mul32.c | 187 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 188 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 189 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 190 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 191 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_mullo_epi32(vi0xGHIJ, vk0xGHIJ)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 192 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_mullo_epi32(vi0xKLMN, vk0xKLMN)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 208 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 209 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 210 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 211 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() [all …]
|
D | up24x25-minmax-fp32-avx-mul32.c | 187 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 188 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 189 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 190 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 191 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_mullo_epi32(vi0xGHIJ, vk0xGHIJ)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 192 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_mullo_epi32(vi0xKLMN, vk0xKLMN)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 208 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 209 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 210 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 211 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() [all …]
|
D | up16x25-minmax-fp32-sse41-mul32.c | 181 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 182 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 183 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 184 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 196 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 197 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 198 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 199 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 211 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 212 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() [all …]
|
D | up16x25-minmax-fp32-avx-mul32.c | 181 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 182 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 183 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 184 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 196 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 197 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 198 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 199 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 211 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 212 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() [all …]
|
D | up8x25-minmax-fp32-sse41-mul32.c | 175 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 176 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 184 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 185 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 193 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 194 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 202 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi3x0123, vk3x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 203 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi3x4567, vk3x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 211 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi4x0123, vk4x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 212 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi4x4567, vk4x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() [all …]
|
D | up8x25-minmax-fp32-avx-mul32.c | 175 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 176 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 184 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 185 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 193 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 194 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 202 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi3x0123, vk3x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 203 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi3x4567, vk3x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 211 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi4x0123, vk4x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 212 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi4x4567, vk4x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() [all …]
|
D | up24x9-minmax-fp32-sse41-mul32.c | 107 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 108 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 109 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 110 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 111 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_mullo_epi32(vi0xGHIJ, vk0xGHIJ)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 112 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_mullo_epi32(vi0xKLMN, vk0xKLMN)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 128 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 129 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 130 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 131 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() [all …]
|
D | up24x9-minmax-fp32-avx-mul32.c | 107 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 108 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 109 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 110 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 111 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_mullo_epi32(vi0xGHIJ, vk0xGHIJ)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 112 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_mullo_epi32(vi0xKLMN, vk0xKLMN)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 128 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 129 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 130 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 131 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() [all …]
|
D | up16x9-minmax-fp32-sse41-mul32.c | 101 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 102 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 103 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 104 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 116 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 117 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 118 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 119 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 131 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 132 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() [all …]
|
D | up16x9-minmax-fp32-avx-mul32.c | 101 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 102 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 103 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 104 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 116 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 117 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 118 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 119 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 131 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 132 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() [all …]
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up24x25-minmax-fp32-sse41-mul32.c | 187 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 188 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 189 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 190 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 191 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_mullo_epi32(vi0xGHIJ, vk0xGHIJ)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 192 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_mullo_epi32(vi0xKLMN, vk0xKLMN)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 208 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 209 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 210 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() 211 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32() [all …]
|
D | up24x25-minmax-fp32-avx-mul32.c | 187 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 188 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 189 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 190 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 191 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_mullo_epi32(vi0xGHIJ, vk0xGHIJ)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 192 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_mullo_epi32(vi0xKLMN, vk0xKLMN)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 208 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 209 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 210 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() 211 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32() [all …]
|
D | up16x25-minmax-fp32-sse41-mul32.c | 181 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 182 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 183 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 184 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 196 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 197 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 198 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 199 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 211 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 212 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() [all …]
|
D | up16x25-minmax-fp32-avx-mul32.c | 181 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 182 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 183 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 184 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 196 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 197 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 198 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 199 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 211 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 212 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() [all …]
|
D | up8x25-minmax-fp32-sse41-mul32.c | 175 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 176 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 184 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 185 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 193 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 194 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 202 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi3x0123, vk3x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 203 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi3x4567, vk3x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 211 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi4x0123, vk4x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 212 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi4x4567, vk4x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() [all …]
|
D | up8x25-minmax-fp32-avx-mul32.c | 175 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 176 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 184 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 185 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 193 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 194 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 202 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi3x0123, vk3x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 203 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi3x4567, vk3x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 211 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi4x0123, vk4x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 212 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi4x4567, vk4x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() [all …]
|
D | up24x9-minmax-fp32-avx-mul32.c | 107 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 108 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 109 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 110 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 111 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_mullo_epi32(vi0xGHIJ, vk0xGHIJ)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 112 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_mullo_epi32(vi0xKLMN, vk0xKLMN)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 128 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 129 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 130 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() 131 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32() [all …]
|
D | up24x9-minmax-fp32-sse41-mul32.c | 107 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 108 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 109 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 110 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 111 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_mullo_epi32(vi0xGHIJ, vk0xGHIJ)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 112 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_mullo_epi32(vi0xKLMN, vk0xKLMN)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 128 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 129 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 130 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() 131 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32() [all …]
|
D | up16x9-minmax-fp32-avx-mul32.c | 101 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 102 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 103 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 104 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 116 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 117 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 118 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 119 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 131 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 132 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() [all …]
|
D | up16x9-minmax-fp32-sse41-mul32.c | 101 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 102 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 103 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 104 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 116 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 117 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 118 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 119 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 131 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() 132 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32() [all …]
|
/external/XNNPACK/src/qu8-dwconv/gen/ |
D | up16x25-minmax-fp32-sse41-mul32.c | 182 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 183 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 184 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 185 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 197 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 198 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 199 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 200 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 212 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() 213 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32() [all …]
|
D | up16x25-minmax-fp32-avx-mul32.c | 182 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 183 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 184 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 185 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 197 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 198 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 199 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 200 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 212 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() 213 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32() [all …]
|
D | up8x25-minmax-fp32-avx-mul32.c | 176 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 177 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 185 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 186 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 194 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 195 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 203 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi3x0123, vk3x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 204 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi3x4567, vk3x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 212 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi4x0123, vk4x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() 213 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi4x4567, vk4x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32() [all …]
|
D | up8x25-minmax-fp32-sse41-mul32.c | 176 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 177 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 185 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 186 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 194 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 195 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 203 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi3x0123, vk3x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 204 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi3x4567, vk3x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 212 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi4x0123, vk4x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() 213 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi4x4567, vk4x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32() [all …]
|
D | up16x9-minmax-fp32-avx-mul32.c | 102 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi0x0123, vk0x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 103 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi0x4567, vk0x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 104 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi0x89AB, vk0x89AB)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 105 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi0xCDEF, vk0xCDEF)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 117 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi1x0123, vk1x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 118 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi1x4567, vk1x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 119 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vi1x89AB, vk1x89AB)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 120 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vi1xCDEF, vk1xCDEF)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 132 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vi2x0123, vk2x0123)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() 133 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vi2x4567, vk2x4567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32() [all …]
|