Home
last modified time | relevance | path

Searched refs:va2c2 (Results 1 – 25 of 178) sorted by relevance

12345678

/external/XNNPACK/src/qs8-gemm/gen/
D3x16c2-minmax-rndnu-neon-mull-dup.c151 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local
155 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
161 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
167 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
173 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
305 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local
306 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
308 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
310 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
312 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
D3x16c2-minmax-rndnu-neon-mull-ld2r.c154 const int8x8_t va2c2 = vreinterpret_s8_s16(va21.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local
158 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
164 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
170 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
176 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
308 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local
309 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
311 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
313 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
315 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
D3x16c2-minmax-rndnu-neon-mull-ld4r.c151 const int8x8_t va2c2 = vreinterpret_s8_s16(va2.val[2]); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local
155 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
161 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
167 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
173 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
305 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local
306 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
308 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
310 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
312 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
D3x16c2-minmax-rndnu-neon-mull-ld1r.c160 const int8x8_t va2c2 = vreinterpret_s8_s16(va22); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local
164 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
170 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
176 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
182 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
314 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local
315 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
317 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
319 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
321 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
D3x8c2-minmax-rndnu-neon-mull-ld1r.c122 const int8x8_t va2c2 = vreinterpret_s8_s16(va22); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r() local
126 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r()
132 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r()
214 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r() local
215 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r()
217 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r()
D3x8c2-minmax-rndnu-neon-mull-ld4r.c113 const int8x8_t va2c2 = vreinterpret_s8_s16(va2.val[2]); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r() local
117 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r()
123 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r()
205 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r() local
206 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r()
208 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r()
D3x8c2-minmax-rndnu-neon-mull-dup.c113 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local
117 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup()
123 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup()
205 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local
206 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup()
208 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup()
D3x8c2-minmax-rndnu-neon-mull-ld2r.c116 const int8x8_t va2c2 = vreinterpret_s8_s16(va21.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r() local
120 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r()
126 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r()
208 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r() local
209 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r()
211 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r()
D4x16c2-minmax-rndnu-neon-mull-ld2r.c184 const int8x8_t va2c2 = vreinterpret_s8_s16(va21.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local
189 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
197 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
205 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
213 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
375 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local
376 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
378 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
380 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
382 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
D4x16c2-minmax-rndnu-neon-mull-ld1r.c192 const int8x8_t va2c2 = vreinterpret_s8_s16(va22); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local
197 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
205 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
213 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
221 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
383 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local
384 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
386 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
388 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
390 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
D4x16c2-minmax-rndnu-neon-mull-dup.c180 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local
185 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
193 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
201 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
209 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
371 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local
372 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
374 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
376 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
378 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
D4x16c2-minmax-rndnu-neon-mull-ld4r.c180 const int8x8_t va2c2 = vreinterpret_s8_s16(va2.val[2]); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local
185 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
193 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
201 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
209 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
371 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local
372 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
374 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
376 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
378 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
/external/XNNPACK/src/qs8-igemm/gen/
D3x16c2-minmax-rndnu-neon-mull-ld1r.c175 const int8x8_t va2c2 = vreinterpret_s8_s16(va22); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local
179 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
185 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
191 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
197 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
329 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local
330 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
332 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
334 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
336 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
D3x16c2-minmax-rndnu-neon-mull-dup.c166 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local
170 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
176 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
182 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
188 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
320 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local
321 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
323 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
325 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
327 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
D3x16c2-minmax-rndnu-neon-mull-ld4r.c166 const int8x8_t va2c2 = vreinterpret_s8_s16(va2.val[2]); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local
170 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
176 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
182 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
188 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
320 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local
321 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
323 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
325 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
327 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
D3x16c2-minmax-rndnu-neon-mull-ld2r.c169 const int8x8_t va2c2 = vreinterpret_s8_s16(va21.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local
173 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
179 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
185 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
191 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
323 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local
324 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
326 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
328 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
330 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
D3x8c2-minmax-rndnu-neon-mull-dup.c128 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local
132 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup()
138 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup()
220 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local
221 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup()
223 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup()
D3x8c2-minmax-rndnu-neon-mull-ld2r.c131 const int8x8_t va2c2 = vreinterpret_s8_s16(va21.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r() local
135 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r()
141 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r()
223 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r() local
224 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r()
226 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r()
D3x8c2-minmax-rndnu-neon-mull-ld4r.c128 const int8x8_t va2c2 = vreinterpret_s8_s16(va2.val[2]); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r() local
132 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r()
138 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r()
220 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r() local
221 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r()
223 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r()
D3x8c2-minmax-rndnu-neon-mull-ld1r.c137 const int8x8_t va2c2 = vreinterpret_s8_s16(va22); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r() local
141 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r()
147 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r()
229 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r() local
230 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r()
232 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r()
D4x16c2-minmax-rndnu-neon-mull-dup.c197 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local
202 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
210 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
218 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
226 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
388 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local
389 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
391 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
393 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
395 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
D4x16c2-minmax-rndnu-neon-mull-ld4r.c197 const int8x8_t va2c2 = vreinterpret_s8_s16(va2.val[2]); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local
202 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
210 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
218 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
226 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
388 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local
389 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
391 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
393 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
395 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
D4x16c2-minmax-rndnu-neon-mull-ld2r.c201 const int8x8_t va2c2 = vreinterpret_s8_s16(va21.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local
206 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
214 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
222 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
230 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
392 const int8x8_t va2c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2), 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local
393 const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
395 const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
397 const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
399 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
/external/XNNPACK/src/f32-gemm/gen/
D3x8-wasmsimd-splat.c99 const v128_t va2c2 = wasm_v32x4_shuffle(va2, va2, 2, 2, 2, 2); in xnn_f32_gemm_ukernel_3x8__wasmsimd_splat() local
106 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2c2, vb0123c2)); in xnn_f32_gemm_ukernel_3x8__wasmsimd_splat()
109 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2c2, vb4567c2)); in xnn_f32_gemm_ukernel_3x8__wasmsimd_splat()
D3x8-relu-wasmrelaxedsimd-fma-splat.c99 const v128_t va2c2 = wasm_v32x4_shuffle(va2, va2, 2, 2, 2, 2); in xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat() local
106 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2c2, vb0123c2)); in xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat()
109 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2c2, vb4567c2)); in xnn_f32_gemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat()

12345678