/external/XNNPACK/src/qs8-igemm/gen/ |
D | 6x16c4-minmax-neondot.c | 129 const int8x8_t va4x01234567 = vld1_s8(a4); a4 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local 159 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb0123x0123, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 160 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 161 vacc4x89AB = vdotq_lane_s32(vacc4x89AB, vb0123x89AB, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 162 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 183 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 184 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb4567x4567, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 185 vacc4x89AB = vdotq_lane_s32(vacc4x89AB, vb4567x89AB, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 186 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb4567xCDEF, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 201 const int8x8_t va4x01234567 = vld1_s8(a4); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local [all …]
|
D | 6x8c4-minmax-neondot.c | 117 const int8x8_t va4x01234567 = vld1_s8(a4); a4 += 8; in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() local 135 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb0123x0123, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 136 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 147 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 148 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb4567x4567, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 161 const int8x8_t va4x01234567 = vld1_s8(a4); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() local 177 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb0123x0123, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 178 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 153 const int8x8_t va4x01234567 = vld1_s8(a4); a4 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local 185 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb0123x0123, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 186 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 187 vacc4x89AB = vdotq_lane_s32(vacc4x89AB, vb0123x89AB, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 188 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 217 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 218 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb4567x4567, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 219 vacc4x89AB = vdotq_lane_s32(vacc4x89AB, vb4567x89AB, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 220 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb4567xCDEF, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 243 const int8x8_t va4x01234567 = vld1_s8(a4); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local [all …]
|
D | 8x8c4-minmax-neondot.c | 137 const int8x8_t va4x01234567 = vld1_s8(a4); a4 += 8; in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local 157 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb0123x0123, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 158 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 173 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 174 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb4567x4567, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 191 const int8x8_t va4x01234567 = vld1_s8(a4); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local 209 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb0123x0123, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 210 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 6x16c4-minmax-neondot.c | 110 const int8x8_t va4x01234567 = vld1_s8(a4); a4 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local 140 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb0123x0123, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 141 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 142 vacc4x89AB = vdotq_lane_s32(vacc4x89AB, vb0123x89AB, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 143 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 164 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 165 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb4567x4567, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 166 vacc4x89AB = vdotq_lane_s32(vacc4x89AB, vb4567x89AB, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 167 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb4567xCDEF, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 182 const int8x8_t va4x01234567 = vld1_s8(a4); a4 += 4; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local [all …]
|
D | 6x8c4-minmax-neondot.c | 98 const int8x8_t va4x01234567 = vld1_s8(a4); a4 += 8; in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() local 116 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb0123x0123, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 117 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 128 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 129 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb4567x4567, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 142 const int8x8_t va4x01234567 = vld1_s8(a4); a4 += 4; in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() local 158 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb0123x0123, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 159 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 130 const int8x8_t va4x01234567 = vld1_s8(a4); a4 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local 162 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb0123x0123, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 163 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 164 vacc4x89AB = vdotq_lane_s32(vacc4x89AB, vb0123x89AB, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 165 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 194 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 195 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb4567x4567, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 196 vacc4x89AB = vdotq_lane_s32(vacc4x89AB, vb4567x89AB, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 197 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb4567xCDEF, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 220 const int8x8_t va4x01234567 = vld1_s8(a4); a4 += 4; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local [all …]
|
D | 8x8c4-minmax-neondot.c | 114 const int8x8_t va4x01234567 = vld1_s8(a4); a4 += 8; in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local 134 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb0123x0123, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 135 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 150 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 151 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb4567x4567, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 168 const int8x8_t va4x01234567 = vld1_s8(a4); a4 += 4; in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local 186 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb0123x0123, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 187 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
|