/external/XNNPACK/src/qs8-gemm/gen/ |
D | 8x16c4-minmax-rndnu-neondot.c | 140 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() local 150 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 154 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 158 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 162 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 166 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 170 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 174 vacc6xCDEF = vdotq_lane_s32(vacc6xCDEF, vb0123xCDEF, va6x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 178 vacc7xCDEF = vdotq_lane_s32(vacc7xCDEF, vb0123xCDEF, va7x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 230 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() local [all …]
|
D | 6x16c4-minmax-rndnu-neondot.c | 118 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() local 128 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() 132 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() 136 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() 140 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() 144 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() 148 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() 190 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() local 196 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() 200 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() [all …]
|
D | 4x16c4-minmax-rndnu-neondot.c | 96 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() local 106 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() 110 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() 114 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() 118 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() 150 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() local 156 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() 160 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() 164 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() 168 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 8x16c4-minmax-fp32-neondot.c | 163 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot() local 173 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot() 177 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot() 181 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot() 185 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot() 189 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot() 193 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot() 197 vacc6xCDEF = vdotq_lane_s32(vacc6xCDEF, vb0123xCDEF, va6x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot() 201 vacc7xCDEF = vdotq_lane_s32(vacc7xCDEF, vb0123xCDEF, va7x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot() 253 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot() local [all …]
|
D | 6x16c4-minmax-fp32-neondot.c | 137 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot() local 147 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot() 151 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot() 155 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot() 159 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot() 163 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot() 167 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot() 209 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot() local 215 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot() 219 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot() [all …]
|
D | 4x16c4-minmax-fp32-neondot.c | 111 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot() local 121 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 125 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 129 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 133 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 165 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot() local 171 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 175 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 179 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 183 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 8x16c4-minmax-fp32-neondot.c | 141 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot() local 151 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot() 155 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot() 159 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot() 163 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot() 167 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot() 171 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot() 175 vacc6xCDEF = vdotq_lane_s32(vacc6xCDEF, vb0123xCDEF, va6x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot() 179 vacc7xCDEF = vdotq_lane_s32(vacc7xCDEF, vb0123xCDEF, va7x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot() 231 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot() local [all …]
|
D | 6x16c4-minmax-fp32-neondot.c | 119 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot() local 129 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot() 133 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot() 137 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot() 141 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot() 145 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot() 149 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot() 191 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot() local 197 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot() 201 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot() [all …]
|
D | 4x16c4-minmax-fp32-neondot.c | 97 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot() local 107 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 111 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 115 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 119 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 151 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot() local 157 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 161 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 165 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 169 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 8x16c4-minmax-rndnu-neondot.c | 162 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() local 172 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 176 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 180 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 184 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 188 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 192 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 196 vacc6xCDEF = vdotq_lane_s32(vacc6xCDEF, vb0123xCDEF, va6x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 200 vacc7xCDEF = vdotq_lane_s32(vacc7xCDEF, vb0123xCDEF, va7x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 252 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() local [all …]
|
D | 6x16c4-minmax-rndnu-neondot.c | 136 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() local 146 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() 150 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() 154 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() 158 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() 162 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() 166 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() 208 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() local 214 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() 218 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() [all …]
|
D | 4x16c4-minmax-rndnu-neondot.c | 110 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() local 120 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 124 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 128 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 132 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 164 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() local 170 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 174 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 178 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 182 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
|
/external/XNNPACK/src/qu8-igemm/gen/ |
D | 8x16c4-minmax-rndnu-neondot.c | 174 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() local 185 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 194 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 203 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 212 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 221 vpacc4xCDEF = vdotq_lane_u32(vpacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 230 vpacc5xCDEF = vdotq_lane_u32(vpacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 239 vpacc6xCDEF = vdotq_lane_u32(vpacc6xCDEF, vb0123xCDEF, va6x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 248 vpacc7xCDEF = vdotq_lane_u32(vpacc7xCDEF, vb0123xCDEF, va7x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() 272 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() local [all …]
|
D | 5x16c4-minmax-rndnu-neondot.c | 132 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot() local 143 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot() 152 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot() 161 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot() 170 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot() 179 vpacc4xCDEF = vdotq_lane_u32(vpacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot() 200 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot() local 207 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot() 212 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot() 217 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot() [all …]
|
D | 4x16c4-minmax-fp32-neondot.c | 119 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot() local 130 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 139 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 148 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 157 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 177 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot() local 184 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 189 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 194 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot() 199 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
|
D | 4x16c4-minmax-rndnu-neondot.c | 118 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() local 129 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 138 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 147 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 156 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 176 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() local 183 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 188 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 193 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() 198 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
|
D | 6x16c4-minmax-rndnu-neondot.c | 146 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() local 157 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() 166 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() 175 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() 184 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() 193 vpacc4xCDEF = vdotq_lane_u32(vpacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() 202 vpacc5xCDEF = vdotq_lane_u32(vpacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() 224 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() local 231 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() 236 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() [all …]
|
D | 3x16c4-minmax-rndnu-neondot.c | 104 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() local 115 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() 124 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() 133 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() 152 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() local 159 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() 164 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() 169 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot()
|
D | 2x16c4-minmax-fp32-neondot.c | 91 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() local 102 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() 111 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() 129 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() local 136 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() 141 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot()
|
/external/XNNPACK/src/qu8-gemm/gen/ |
D | 8x16c4-minmax-rndnu-neondot.c | 150 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() local 161 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 170 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 179 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 188 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 197 vpacc4xCDEF = vdotq_lane_u32(vpacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 206 vpacc5xCDEF = vdotq_lane_u32(vpacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 215 vpacc6xCDEF = vdotq_lane_u32(vpacc6xCDEF, vb0123xCDEF, va6x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 224 vpacc7xCDEF = vdotq_lane_u32(vpacc7xCDEF, vb0123xCDEF, va7x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 248 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() local [all …]
|
D | 5x16c4-minmax-rndnu-neondot.c | 114 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot() local 125 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot() 134 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot() 143 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot() 152 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot() 161 vpacc4xCDEF = vdotq_lane_u32(vpacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot() 182 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot() local 189 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot() 194 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot() 199 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot() [all …]
|
D | 4x16c4-minmax-fp32-neondot.c | 103 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() local 114 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 123 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 132 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 141 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 161 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() local 168 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 173 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 178 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() 183 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
|
D | 4x16c4-minmax-rndnu-neondot.c | 102 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() local 113 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() 122 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() 131 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() 140 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() 160 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() local 167 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() 172 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() 177 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() 182 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
|
D | 6x16c4-minmax-rndnu-neondot.c | 126 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() local 137 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() 146 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() 155 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() 164 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() 173 vpacc4xCDEF = vdotq_lane_u32(vpacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() 182 vpacc5xCDEF = vdotq_lane_u32(vpacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() 204 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() local 211 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() 216 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() [all …]
|
D | 3x16c4-minmax-rndnu-neondot.c | 90 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() local 101 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() 110 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() 119 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() 138 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() local 145 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() 150 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() 155 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot()
|