Home
last modified time | relevance | path

Searched refs:vout4x01234567_5x01234567 (Results 1 – 10 of 10) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D6x8c4-minmax-neondot.c218 … int8x16_t vout4x01234567_5x01234567 = vqmovn_high_s16(vqmovn_s16(vacc4x01234567), vacc5x01234567); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() local
229 …int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc5x012… in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() local
236 vout4x01234567_5x01234567 = vmaxq_s8(vout4x01234567_5x01234567, voutput_min); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
240 vout4x01234567_5x01234567 = vminq_s8(vout4x01234567_5x01234567, voutput_max); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
248 vst1_s8(c4 + 0, vget_low_s8(vout4x01234567_5x01234567)); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
249 vst1_s8(c5 + 0, vget_high_s8(vout4x01234567_5x01234567)); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
274 …_lane_u32(__builtin_assume_aligned(c4, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 0); c4… in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
275 …_lane_u32(__builtin_assume_aligned(c5, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 2); c5… in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
278vout4x01234567_5x01234567 = vextq_s8(vout4x01234567_5x01234567, vout4x01234567_5x01234567, 4); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
285 …_lane_u16(__builtin_assume_aligned(c4, 1), vreinterpretq_u16_s8(vout4x01234567_5x01234567), 0); c4… in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
[all …]
D8x8c4-minmax-neondot.c264 … int8x16_t vout4x01234567_5x01234567 = vqmovn_high_s16(vqmovn_s16(vacc4x01234567), vacc5x01234567); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local
278 …int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc5x012… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local
286 vout4x01234567_5x01234567 = vmaxq_s8(vout4x01234567_5x01234567, voutput_min); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
291 vout4x01234567_5x01234567 = vminq_s8(vout4x01234567_5x01234567, voutput_max); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
300 vst1_s8(c4 + 0, vget_low_s8(vout4x01234567_5x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
301 vst1_s8(c5 + 0, vget_high_s8(vout4x01234567_5x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
332 …_lane_u32(__builtin_assume_aligned(c4, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 0); c4… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
333 …_lane_u32(__builtin_assume_aligned(c5, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 2); c5… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
338vout4x01234567_5x01234567 = vextq_s8(vout4x01234567_5x01234567, vout4x01234567_5x01234567, 4); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
346 …_lane_u16(__builtin_assume_aligned(c4, 1), vreinterpretq_u16_s8(vout4x01234567_5x01234567), 0); c4… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
[all …]
D6x16c4-minmax-neondot.c385 …int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vget_low_s8(vout4x0123456789ABCDEF), vget_low_s8… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local
391 vst1_s8(c4, vget_low_s8(vout4x01234567_5x01234567)); c4 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
392 vst1_s8(c5, vget_high_s8(vout4x01234567_5x01234567)); c5 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
395vout4x01234567_5x01234567 = vcombine_s8(vget_high_s8(vout4x0123456789ABCDEF), vget_high_s8(vout5x0… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
402 …_lane_u32(__builtin_assume_aligned(c4, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 0); c4… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
403 …_lane_u32(__builtin_assume_aligned(c5, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 2); c5… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
406vout4x01234567_5x01234567 = vextq_s8(vout4x01234567_5x01234567, vout4x01234567_5x01234567, 4); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
413 …_lane_u16(__builtin_assume_aligned(c4, 1), vreinterpretq_u16_s8(vout4x01234567_5x01234567), 0); c4… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
414 …_lane_u16(__builtin_assume_aligned(c5, 1), vreinterpretq_u16_s8(vout4x01234567_5x01234567), 4); c5… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
417vout4x01234567_5x01234567 = vextq_s8(vout4x01234567_5x01234567, vout4x01234567_5x01234567, 2); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
[all …]
D8x16c4-minmax-neondot.c479 …int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vget_low_s8(vout4x0123456789ABCDEF), vget_low_s8… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local
486 vst1_s8(c4, vget_low_s8(vout4x01234567_5x01234567)); c4 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
487 vst1_s8(c5, vget_high_s8(vout4x01234567_5x01234567)); c5 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
492vout4x01234567_5x01234567 = vcombine_s8(vget_high_s8(vout4x0123456789ABCDEF), vget_high_s8(vout5x0… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
500 …_lane_u32(__builtin_assume_aligned(c4, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 0); c4… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
501 …_lane_u32(__builtin_assume_aligned(c5, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 2); c5… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
506vout4x01234567_5x01234567 = vextq_s8(vout4x01234567_5x01234567, vout4x01234567_5x01234567, 4); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
514 …_lane_u16(__builtin_assume_aligned(c4, 1), vreinterpretq_u16_s8(vout4x01234567_5x01234567), 0); c4… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
515 …_lane_u16(__builtin_assume_aligned(c5, 1), vreinterpretq_u16_s8(vout4x01234567_5x01234567), 4); c5… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
520vout4x01234567_5x01234567 = vextq_s8(vout4x01234567_5x01234567, vout4x01234567_5x01234567, 2); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
[all …]
/external/XNNPACK/src/qs8-igemm/gen/
D6x8c4-minmax-neondot.c238 … int8x16_t vout4x01234567_5x01234567 = vqmovn_high_s16(vqmovn_s16(vacc4x01234567), vacc5x01234567); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() local
249 …int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc5x012… in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() local
254 vout4x01234567_5x01234567 = vmaxq_s8(vout4x01234567_5x01234567, voutput_min); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
258 vout4x01234567_5x01234567 = vminq_s8(vout4x01234567_5x01234567, voutput_max); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
263 vst1_s8(c5 + 0, vget_high_s8(vout4x01234567_5x01234567)); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
264 vst1_s8(c4 + 0, vget_low_s8(vout4x01234567_5x01234567)); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
282 …_lane_u32(__builtin_assume_aligned(c5, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 2); c5… in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
283 …_lane_u32(__builtin_assume_aligned(c4, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 0); c4… in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
288vout4x01234567_5x01234567 = vextq_s8(vout4x01234567_5x01234567, vout4x01234567_5x01234567, 4); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
293 …_lane_u16(__builtin_assume_aligned(c5, 1), vreinterpretq_u16_s8(vout4x01234567_5x01234567), 4); c5… in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
[all …]
D8x8c4-minmax-neondot.c288 … int8x16_t vout4x01234567_5x01234567 = vqmovn_high_s16(vqmovn_s16(vacc4x01234567), vacc5x01234567); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local
302 …int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc5x012… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local
309 vout4x01234567_5x01234567 = vmaxq_s8(vout4x01234567_5x01234567, voutput_min); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
314 vout4x01234567_5x01234567 = vminq_s8(vout4x01234567_5x01234567, voutput_max); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
321 vst1_s8(c5 + 0, vget_high_s8(vout4x01234567_5x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
322 vst1_s8(c4 + 0, vget_low_s8(vout4x01234567_5x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
344 …_lane_u32(__builtin_assume_aligned(c5, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 2); c5… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
345 …_lane_u32(__builtin_assume_aligned(c4, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 0); c4… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
351vout4x01234567_5x01234567 = vextq_s8(vout4x01234567_5x01234567, vout4x01234567_5x01234567, 4); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
358 …_lane_u16(__builtin_assume_aligned(c5, 1), vreinterpretq_u16_s8(vout4x01234567_5x01234567), 4); c5… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
[all …]
D6x16c4-minmax-neondot.c397 …int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vget_low_s8(vout4x0123456789ABCDEF), vget_low_s8… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local
399 vst1_s8(c5, vget_high_s8(vout4x01234567_5x01234567)); c5 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
400 vst1_s8(c4, vget_low_s8(vout4x01234567_5x01234567)); c4 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
405vout4x01234567_5x01234567 = vcombine_s8(vget_high_s8(vout4x0123456789ABCDEF), vget_high_s8(vout5x0… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
410 …_lane_u32(__builtin_assume_aligned(c5, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 2); c5… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
411 …_lane_u32(__builtin_assume_aligned(c4, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 0); c4… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
416vout4x01234567_5x01234567 = vextq_s8(vout4x01234567_5x01234567, vout4x01234567_5x01234567, 4); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
421 …_lane_u16(__builtin_assume_aligned(c5, 1), vreinterpretq_u16_s8(vout4x01234567_5x01234567), 4); c5… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
422 …_lane_u16(__builtin_assume_aligned(c4, 1), vreinterpretq_u16_s8(vout4x01234567_5x01234567), 0); c4… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
427vout4x01234567_5x01234567 = vextq_s8(vout4x01234567_5x01234567, vout4x01234567_5x01234567, 2); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
[all …]
D8x16c4-minmax-neondot.c493 …int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vget_low_s8(vout4x0123456789ABCDEF), vget_low_s8… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local
498 vst1_s8(c5, vget_high_s8(vout4x01234567_5x01234567)); c5 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
499 vst1_s8(c4, vget_low_s8(vout4x01234567_5x01234567)); c4 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
505vout4x01234567_5x01234567 = vcombine_s8(vget_high_s8(vout4x0123456789ABCDEF), vget_high_s8(vout5x0… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
512 …_lane_u32(__builtin_assume_aligned(c5, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 2); c5… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
513 …_lane_u32(__builtin_assume_aligned(c4, 1), vreinterpretq_u32_s8(vout4x01234567_5x01234567), 0); c4… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
519vout4x01234567_5x01234567 = vextq_s8(vout4x01234567_5x01234567, vout4x01234567_5x01234567, 4); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
526 …_lane_u16(__builtin_assume_aligned(c5, 1), vreinterpretq_u16_s8(vout4x01234567_5x01234567), 4); c5… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
527 …_lane_u16(__builtin_assume_aligned(c4, 1), vreinterpretq_u16_s8(vout4x01234567_5x01234567), 0); c4… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
533vout4x01234567_5x01234567 = vextq_s8(vout4x01234567_5x01234567, vout4x01234567_5x01234567, 2); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
[all …]
/external/XNNPACK/src/qu8-gemm/
D8x8-minmax-neon.c518 …uint8x16_t vout4x01234567_5x01234567 = vqmovun_high_s16(vqmovun_s16(vacc4x01234567), vacc5x0123456… in xnn_qu8_gemm_minmax_ukernel_8x8__neon() local
540 …uint8x16_t vout4x01234567_5x01234567 = vcombine_u8(vqmovun_s16(vacc4x01234567), vqmovun_s16(vacc5x… in xnn_qu8_gemm_minmax_ukernel_8x8__neon() local
548 vout4x01234567_5x01234567 = vmaxq_u8(vout4x01234567_5x01234567, voutput_min); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
552 vout4x01234567_5x01234567 = vminq_u8(vout4x01234567_5x01234567, voutput_max); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
560 … vst1_u8(c4, vget_low_u8(vout4x01234567_5x01234567)); c4 = (uint8_t*) ((uintptr_t) c4 + cn_stride); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
561 …vst1_u8(c5, vget_high_u8(vout4x01234567_5x01234567)); c5 = (uint8_t*) ((uintptr_t) c5 + cn_stride); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
581 …_lane_u32(__builtin_assume_aligned(c4, 1), vreinterpretq_u32_u8(vout4x01234567_5x01234567), 0); c4… in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
582 …_lane_u32(__builtin_assume_aligned(c5, 1), vreinterpretq_u32_u8(vout4x01234567_5x01234567), 2); c5… in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
587vout4x01234567_5x01234567 = vextq_u8(vout4x01234567_5x01234567, vout4x01234567_5x01234567, 4); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
595 …_lane_u16(__builtin_assume_aligned(c4, 1), vreinterpretq_u16_u8(vout4x01234567_5x01234567), 0); c4… in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
[all …]
/external/XNNPACK/src/qu8-igemm/
D8x8-minmax-neon.c563 …uint8x16_t vout4x01234567_5x01234567 = vqmovun_high_s16(vqmovun_s16(vacc4x01234567), vacc5x0123456… in xnn_qu8_igemm_minmax_ukernel_8x8__neon() local
585 …uint8x16_t vout4x01234567_5x01234567 = vcombine_u8(vqmovun_s16(vacc4x01234567), vqmovun_s16(vacc5x… in xnn_qu8_igemm_minmax_ukernel_8x8__neon() local
593 vout4x01234567_5x01234567 = vmaxq_u8(vout4x01234567_5x01234567, voutput_min); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
597 vout4x01234567_5x01234567 = vminq_u8(vout4x01234567_5x01234567, voutput_max); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
603 vst1_u8(c5, vget_high_u8(vout4x01234567_5x01234567)); c5 += cn_stride; in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
604 vst1_u8(c4, vget_low_u8(vout4x01234567_5x01234567)); c4 += cn_stride; in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
617 …_lane_u32(__builtin_assume_aligned(c5, 1), vreinterpretq_u32_u8(vout4x01234567_5x01234567), 2); c5… in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
618 …_lane_u32(__builtin_assume_aligned(c4, 1), vreinterpretq_u32_u8(vout4x01234567_5x01234567), 0); c4… in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
624vout4x01234567_5x01234567 = vextq_u8(vout4x01234567_5x01234567, vout4x01234567_5x01234567, 4); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
631 …_lane_u16(__builtin_assume_aligned(c5, 1), vreinterpretq_u16_u8(vout4x01234567_5x01234567), 4); c5… in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
[all …]