/external/XNNPACK/src/qu8-igemm/gen/ |
D | 1x32c4-minmax-rndnu-neondot.c | 146 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot() local 147 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot() 148 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot() 149 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot() 150 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot() 151 int32x4_t vacc0xGHIJ = vreinterpretq_s32_u32(vsubq_u32(vpacc0xGHIJ, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot() 152 int32x4_t vacc0xKLMN = vreinterpretq_s32_u32(vsubq_u32(vpacc0xKLMN, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot() 153 int32x4_t vacc0xOPQR = vreinterpretq_s32_u32(vsubq_u32(vpacc0xOPQR, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot() 154 int32x4_t vacc0xSTUV = vreinterpretq_s32_u32(vsubq_u32(vpacc0xSTUV, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot()
|
D | 1x16c4-minmax-fp32-neondot.c | 119 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot() local 120 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot() 121 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot() 122 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot() 123 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot()
|
D | 1x16c4-minmax-rndnu-neondot.c | 118 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot() local 119 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot() 120 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot() 121 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot() 122 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot()
|
D | 2x32c4-minmax-rndnu-neondot.c | 191 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot() local 192 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot() 193 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot() 194 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot() 195 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot() 196 int32x4_t vacc0xGHIJ = vreinterpretq_s32_u32(vsubq_u32(vpacc0xGHIJ, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot() 197 int32x4_t vacc0xKLMN = vreinterpretq_s32_u32(vsubq_u32(vpacc0xKLMN, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot() 198 int32x4_t vacc0xOPQR = vreinterpretq_s32_u32(vsubq_u32(vpacc0xOPQR, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot() 199 int32x4_t vacc0xSTUV = vreinterpretq_s32_u32(vsubq_u32(vpacc0xSTUV, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot()
|
D | 2x16c4-minmax-fp32-neondot.c | 148 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() local 149 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() 150 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() 151 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() 152 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot()
|
D | 1x8c4-minmax-rndnu-neondot.c | 104 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot() local 105 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot() 106 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot()
|
D | 2x16c4-minmax-rndnu-neondot.c | 147 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot() local 148 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot() 149 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot() 150 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot() 151 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot()
|
D | 3x32c4-minmax-rndnu-neondot.c | 236 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot() local 237 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot() 238 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot() 239 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot() 240 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot() 241 int32x4_t vacc0xGHIJ = vreinterpretq_s32_u32(vsubq_u32(vpacc0xGHIJ, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot() 242 int32x4_t vacc0xKLMN = vreinterpretq_s32_u32(vsubq_u32(vpacc0xKLMN, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot() 243 int32x4_t vacc0xOPQR = vreinterpretq_s32_u32(vsubq_u32(vpacc0xOPQR, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot() 244 int32x4_t vacc0xSTUV = vreinterpretq_s32_u32(vsubq_u32(vpacc0xSTUV, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot()
|
D | 2x8c4-minmax-rndnu-neondot.c | 125 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot() local 126 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot() 127 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot()
|
D | 3x16c4-minmax-rndnu-neondot.c | 176 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() local 177 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() 178 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() 179 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() 180 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot()
|
D | 3x8c4-minmax-rndnu-neondot.c | 146 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot() local 147 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot() 148 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot()
|
D | 4x16c4-minmax-fp32-neondot.c | 206 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot() local 207 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 208 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 209 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 210 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
|
D | 4x16c4-minmax-rndnu-neondot.c | 205 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() local 206 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 207 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 208 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 209 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
|
/external/XNNPACK/src/qu8-gemm/gen/ |
D | 1x32c4-minmax-rndnu-neondot.c | 134 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x32c4__neondot() local 135 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x32c4__neondot() 136 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x32c4__neondot() 137 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x32c4__neondot() 138 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x32c4__neondot() 139 int32x4_t vacc0xGHIJ = vreinterpretq_s32_u32(vsubq_u32(vpacc0xGHIJ, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x32c4__neondot() 140 int32x4_t vacc0xKLMN = vreinterpretq_s32_u32(vsubq_u32(vpacc0xKLMN, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x32c4__neondot() 141 int32x4_t vacc0xOPQR = vreinterpretq_s32_u32(vsubq_u32(vpacc0xOPQR, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x32c4__neondot() 142 int32x4_t vacc0xSTUV = vreinterpretq_s32_u32(vsubq_u32(vpacc0xSTUV, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x32c4__neondot()
|
D | 1x16c4-minmax-rndnu-neondot.c | 106 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot() local 107 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot() 108 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot() 109 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot() 110 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot()
|
D | 1x16c4-minmax-fp32-neondot.c | 107 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot() local 108 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot() 109 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot() 110 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot() 111 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot()
|
D | 2x32c4-minmax-rndnu-neondot.c | 177 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot() local 178 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot() 179 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot() 180 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot() 181 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot() 182 int32x4_t vacc0xGHIJ = vreinterpretq_s32_u32(vsubq_u32(vpacc0xGHIJ, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot() 183 int32x4_t vacc0xKLMN = vreinterpretq_s32_u32(vsubq_u32(vpacc0xKLMN, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot() 184 int32x4_t vacc0xOPQR = vreinterpretq_s32_u32(vsubq_u32(vpacc0xOPQR, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot() 185 int32x4_t vacc0xSTUV = vreinterpretq_s32_u32(vsubq_u32(vpacc0xSTUV, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot()
|
D | 1x8c4-minmax-rndnu-neondot.c | 92 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot() local 93 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot() 94 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot()
|
D | 2x16c4-minmax-fp32-neondot.c | 134 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_gemm_minmax_fp32_ukernel_2x16c4__neondot() local 135 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_fp32_ukernel_2x16c4__neondot() 136 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_gemm_minmax_fp32_ukernel_2x16c4__neondot() 137 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_gemm_minmax_fp32_ukernel_2x16c4__neondot() 138 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_gemm_minmax_fp32_ukernel_2x16c4__neondot()
|
D | 2x16c4-minmax-rndnu-neondot.c | 133 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot() local 134 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot() 135 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot() 136 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot() 137 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot()
|
D | 3x32c4-minmax-rndnu-neondot.c | 220 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot() local 221 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot() 222 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot() 223 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot() 224 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot() 225 int32x4_t vacc0xGHIJ = vreinterpretq_s32_u32(vsubq_u32(vpacc0xGHIJ, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot() 226 int32x4_t vacc0xKLMN = vreinterpretq_s32_u32(vsubq_u32(vpacc0xKLMN, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot() 227 int32x4_t vacc0xOPQR = vreinterpretq_s32_u32(vsubq_u32(vpacc0xOPQR, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot() 228 int32x4_t vacc0xSTUV = vreinterpretq_s32_u32(vsubq_u32(vpacc0xSTUV, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot()
|
D | 2x8c4-minmax-rndnu-neondot.c | 111 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x8c4__neondot() local 112 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x8c4__neondot() 113 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x8c4__neondot()
|
D | 3x16c4-minmax-rndnu-neondot.c | 160 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() local 161 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() 162 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() 163 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() 164 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot()
|
D | 3x8c4-minmax-rndnu-neondot.c | 130 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x8c4__neondot() local 131 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x8c4__neondot() 132 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x8c4__neondot()
|
D | 4x16c4-minmax-fp32-neondot.c | 188 const uint32x4_t vnacc0x0123 = vcombine_u32(vnacc0, vnacc0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() local 189 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 190 int32x4_t vacc0x4567 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x4567, vnacc0x0123)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 191 int32x4_t vacc0x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc0x89AB, vnacc0x0123)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 192 int32x4_t vacc0xCDEF = vreinterpretq_s32_u32(vsubq_u32(vpacc0xCDEF, vnacc0x0123)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
|