/external/XNNPACK/src/qs8-igemm/gen/ |
D | 6x16c4-minmax-neondot.c | 130 const int8x8_t va5x01234567 = vld1_s8(a5); a5 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local 163 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb0123x0123, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 164 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 165 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 166 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 187 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 188 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb4567x4567, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 189 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb4567x89AB, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 190 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb4567xCDEF, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 202 const int8x8_t va5x01234567 = vld1_s8(a5); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local [all …]
|
D | 6x8c4-minmax-neondot.c | 118 const int8x8_t va5x01234567 = vld1_s8(a5); a5 += 8; in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() local 137 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb0123x0123, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 138 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 149 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 150 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb4567x4567, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 162 const int8x8_t va5x01234567 = vld1_s8(a5); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() local 179 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb0123x0123, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 180 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 154 const int8x8_t va5x01234567 = vld1_s8(a5); a5 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local 189 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb0123x0123, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 190 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 191 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 192 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 221 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 222 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb4567x4567, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 223 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb4567x89AB, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 224 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb4567xCDEF, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 244 const int8x8_t va5x01234567 = vld1_s8(a5); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local [all …]
|
D | 8x8c4-minmax-neondot.c | 138 const int8x8_t va5x01234567 = vld1_s8(a5); a5 += 8; in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local 159 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb0123x0123, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 160 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 175 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 176 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb4567x4567, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 192 const int8x8_t va5x01234567 = vld1_s8(a5); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local 211 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb0123x0123, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 212 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 6x16c4-minmax-neondot.c | 111 const int8x8_t va5x01234567 = vld1_s8(a5); a5 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local 144 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb0123x0123, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 145 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 146 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 147 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 168 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 169 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb4567x4567, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 170 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb4567x89AB, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 171 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb4567xCDEF, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 183 const int8x8_t va5x01234567 = vld1_s8(a5); a5 += 4; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local [all …]
|
D | 6x8c4-minmax-neondot.c | 99 const int8x8_t va5x01234567 = vld1_s8(a5); a5 += 8; in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() local 118 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb0123x0123, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 119 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 130 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 131 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb4567x4567, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 143 const int8x8_t va5x01234567 = vld1_s8(a5); a5 += 4; in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() local 160 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb0123x0123, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 161 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 131 const int8x8_t va5x01234567 = vld1_s8(a5); a5 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local 166 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb0123x0123, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 167 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 168 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 169 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 198 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 199 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb4567x4567, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 200 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb4567x89AB, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 201 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb4567xCDEF, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 221 const int8x8_t va5x01234567 = vld1_s8(a5); a5 += 4; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local [all …]
|
D | 8x8c4-minmax-neondot.c | 115 const int8x8_t va5x01234567 = vld1_s8(a5); a5 += 8; in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local 136 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb0123x0123, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 137 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 152 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 153 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb4567x4567, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 169 const int8x8_t va5x01234567 = vld1_s8(a5); a5 += 4; in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local 188 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb0123x0123, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 189 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
|