Lines Matching refs:vget_high_s16
122 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
124 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
126 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
128 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
130 vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa4), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
132 vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa5), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
134 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa6), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
136 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa7), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
142 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
144 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
146 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa2), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
148 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa3), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
150 vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa4), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
152 vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa5), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
154 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa6), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
156 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa7), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
162 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
164 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa1), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
166 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa2), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
168 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa3), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
170 vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa4), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
172 vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa5), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
174 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa6), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
176 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa7), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
182 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa0), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
184 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa1), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
186 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa2), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
188 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa3), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
190 vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa4), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
192 vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa5), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
194 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa6), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
196 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa7), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
201 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa0), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
202 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa0), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
203 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa1), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
204 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa1), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
205 vacc2x0123 = vmlal_lane_s16(vacc2x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa2), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
206 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa2), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
207 vacc3x0123 = vmlal_lane_s16(vacc3x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa3), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
208 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa3), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
209 vacc4x0123 = vmlal_lane_s16(vacc4x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa4), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
210 vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa4), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
211 vacc5x0123 = vmlal_lane_s16(vacc5x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa5), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
212 vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa5), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
213 vacc6x0123 = vmlal_lane_s16(vacc6x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa6), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
214 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa6), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
215 vacc7x0123 = vmlal_lane_s16(vacc7x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa7), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
216 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa7), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
221 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa0), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
222 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa0), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
223 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa1), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
224 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa1), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
225 vacc2x0123 = vmlal_lane_s16(vacc2x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa2), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
226 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa2), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
227 vacc3x0123 = vmlal_lane_s16(vacc3x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa3), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
228 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa3), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
229 vacc4x0123 = vmlal_lane_s16(vacc4x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa4), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
230 vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa4), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
231 vacc5x0123 = vmlal_lane_s16(vacc5x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa5), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
232 vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa5), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
233 vacc6x0123 = vmlal_lane_s16(vacc6x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa6), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
234 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa6), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
235 vacc7x0123 = vmlal_lane_s16(vacc7x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa7), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
236 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa7), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
241 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa0), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
242 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa0), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
243 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa1), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
244 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa1), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
245 vacc2x0123 = vmlal_lane_s16(vacc2x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa2), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
246 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa2), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
247 vacc3x0123 = vmlal_lane_s16(vacc3x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa3), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
248 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa3), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
249 vacc4x0123 = vmlal_lane_s16(vacc4x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa4), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
250 vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa4), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
251 vacc5x0123 = vmlal_lane_s16(vacc5x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa5), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
252 vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa5), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
253 vacc6x0123 = vmlal_lane_s16(vacc6x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa6), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
254 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa6), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
255 vacc7x0123 = vmlal_lane_s16(vacc7x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa7), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
256 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa7), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
261 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c7), vget_high_s16(vxa0), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
262 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c7), vget_high_s16(vxa0), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
263 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c7), vget_high_s16(vxa1), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
264 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c7), vget_high_s16(vxa1), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
265 vacc2x0123 = vmlal_lane_s16(vacc2x0123, vget_low_s16(vxb01234567c7), vget_high_s16(vxa2), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
266 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c7), vget_high_s16(vxa2), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
267 vacc3x0123 = vmlal_lane_s16(vacc3x0123, vget_low_s16(vxb01234567c7), vget_high_s16(vxa3), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
268 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c7), vget_high_s16(vxa3), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
269 vacc4x0123 = vmlal_lane_s16(vacc4x0123, vget_low_s16(vxb01234567c7), vget_high_s16(vxa4), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
270 vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c7), vget_high_s16(vxa4), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
271 vacc5x0123 = vmlal_lane_s16(vacc5x0123, vget_low_s16(vxb01234567c7), vget_high_s16(vxa5), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
272 vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c7), vget_high_s16(vxa5), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
273 vacc6x0123 = vmlal_lane_s16(vacc6x0123, vget_low_s16(vxb01234567c7), vget_high_s16(vxa6), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
274 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c7), vget_high_s16(vxa6), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
275 vacc7x0123 = vmlal_lane_s16(vacc7x0123, vget_low_s16(vxb01234567c7), vget_high_s16(vxa7), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
276 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c7), vget_high_s16(vxa7), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
302 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
304 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
306 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
308 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
310 vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa4), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
312 vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa5), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
314 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa6), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
316 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa7), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
323 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
325 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
327 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa2), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
329 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa3), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
331 … vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa4), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
333 … vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa5), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
335 … vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa6), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
337 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa7), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
344 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
346 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa1), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
348 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa2), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
350 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa3), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
352 … vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa4), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
354 … vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa5), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
356 … vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa6), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
358 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa7), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
365 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa0), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
367 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa1), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
369 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa2), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
371 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa3), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
373 … vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa4), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
375 … vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa5), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
377 … vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa6), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
379 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa7), 3); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
385 … vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa0), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
386 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa0), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
387 … vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa1), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
388 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa1), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
389 … vacc2x0123 = vmlal_lane_s16(vacc2x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa2), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
390 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa2), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
391 … vacc3x0123 = vmlal_lane_s16(vacc3x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa3), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
392 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa3), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
393 … vacc4x0123 = vmlal_lane_s16(vacc4x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa4), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
394 … vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa4), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
395 … vacc5x0123 = vmlal_lane_s16(vacc5x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa5), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
396 … vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa5), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
397 … vacc6x0123 = vmlal_lane_s16(vacc6x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa6), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
398 … vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa6), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
399 … vacc7x0123 = vmlal_lane_s16(vacc7x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa7), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
400 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa7), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
406 … vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa0), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
407 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa0), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
408 … vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa1), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
409 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa1), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
410 … vacc2x0123 = vmlal_lane_s16(vacc2x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa2), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
411 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa2), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
412 … vacc3x0123 = vmlal_lane_s16(vacc3x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa3), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
413 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa3), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
414 … vacc4x0123 = vmlal_lane_s16(vacc4x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa4), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
415 … vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa4), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
416 … vacc5x0123 = vmlal_lane_s16(vacc5x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa5), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
417 … vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa5), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
418 … vacc6x0123 = vmlal_lane_s16(vacc6x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa6), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
419 … vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa6), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
420 … vacc7x0123 = vmlal_lane_s16(vacc7x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa7), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
421 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa7), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
427 … vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa0), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
428 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa0), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
429 … vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa1), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
430 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa1), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
431 … vacc2x0123 = vmlal_lane_s16(vacc2x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa2), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
432 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa2), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
433 … vacc3x0123 = vmlal_lane_s16(vacc3x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa3), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
434 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa3), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
435 … vacc4x0123 = vmlal_lane_s16(vacc4x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa4), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
436 … vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa4), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
437 … vacc5x0123 = vmlal_lane_s16(vacc5x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa5), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
438 … vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa5), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
439 … vacc6x0123 = vmlal_lane_s16(vacc6x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa6), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
440 … vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa6), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
441 … vacc7x0123 = vmlal_lane_s16(vacc7x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa7), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
442 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa7), 2); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()