/external/XNNPACK/src/qu8-gemm/gen/ |
D | 1x8c4-minmax-rndnu-neondot.c | 49 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot() local 68 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot() 70 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot() 86 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot() 93 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot()
|
D | 2x8c4-minmax-rndnu-neondot.c | 55 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x8c4__neondot() local 57 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_2x8c4__neondot() 78 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x8c4__neondot() 80 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x8c4__neondot() 102 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x8c4__neondot() 112 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x8c4__neondot()
|
D | 1x16c4-minmax-rndnu-neondot.c | 49 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot() local 74 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot() 78 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot() 98 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot() 107 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot()
|
D | 1x16c4-minmax-fp32-neondot.c | 50 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot() local 75 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot() 79 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot() 99 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot() 108 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot()
|
D | 3x8c4-minmax-rndnu-neondot.c | 61 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x8c4__neondot() local 63 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_3x8c4__neondot() 65 uint32x4_t vpacc2x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_3x8c4__neondot() 88 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x8c4__neondot() 90 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x8c4__neondot() 118 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x8c4__neondot() 131 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x8c4__neondot()
|
D | 4x8c4-minmax-rndnu-neondot.c | 67 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot() local 69 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot() 71 uint32x4_t vpacc2x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot() 73 uint32x4_t vpacc3x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot() 98 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot() 100 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot() 134 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot() 150 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot()
|
D | 5x8c4-minmax-rndnu-neondot.c | 73 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot() local 75 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot() 77 uint32x4_t vpacc2x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot() 79 uint32x4_t vpacc3x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot() 81 uint32x4_t vpacc4x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot() 108 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot() 110 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot() 150 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot() 169 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot()
|
D | 2x16c4-minmax-fp32-neondot.c | 56 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_gemm_minmax_fp32_ukernel_2x16c4__neondot() local 60 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_fp32_ukernel_2x16c4__neondot() 87 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_2x16c4__neondot() 91 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_fp32_ukernel_2x16c4__neondot() 121 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_2x16c4__neondot() 135 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_fp32_ukernel_2x16c4__neondot()
|
D | 2x16c4-minmax-rndnu-neondot.c | 55 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot() local 59 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot() 86 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot() 90 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot() 120 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot() 134 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot()
|
D | 6x8c4-minmax-rndnu-neondot.c | 79 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() local 81 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 83 uint32x4_t vpacc2x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 85 uint32x4_t vpacc3x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 87 uint32x4_t vpacc4x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 89 uint32x4_t vpacc5x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 118 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 120 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 166 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 188 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot()
|
D | 8x8c4-minmax-rndnu-neondot.c | 91 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() local 93 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 95 uint32x4_t vpacc2x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 97 uint32x4_t vpacc3x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 99 uint32x4_t vpacc4x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 101 uint32x4_t vpacc5x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 103 uint32x4_t vpacc6x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 105 uint32x4_t vpacc7x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 138 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 140 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() [all …]
|
D | 3x16c4-minmax-rndnu-neondot.c | 61 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() local 65 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() 69 uint32x4_t vpacc2x0123 = vpacc0x0123; in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() 98 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() 102 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() 142 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() 161 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot()
|
/external/XNNPACK/src/qu8-igemm/gen/ |
D | 1x8c4-minmax-rndnu-neondot.c | 51 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot() local 78 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot() 80 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot() 96 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot() 105 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot()
|
D | 2x8c4-minmax-rndnu-neondot.c | 55 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot() local 57 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot() 90 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot() 92 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot() 114 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot() 126 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot()
|
D | 1x16c4-minmax-fp32-neondot.c | 52 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot() local 85 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot() 89 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot() 109 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot() 120 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot()
|
D | 3x8c4-minmax-rndnu-neondot.c | 59 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot() local 61 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot() 63 uint32x4_t vpacc2x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot() 102 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot() 104 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot() 132 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot() 147 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot()
|
D | 1x16c4-minmax-rndnu-neondot.c | 51 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot() local 84 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot() 88 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot() 108 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot() 119 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot()
|
D | 4x8c4-minmax-rndnu-neondot.c | 63 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot() local 65 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot() 67 uint32x4_t vpacc2x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot() 69 uint32x4_t vpacc3x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot() 114 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot() 116 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot() 150 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot() 168 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot()
|
D | 5x8c4-minmax-rndnu-neondot.c | 67 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot() local 69 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot() 71 uint32x4_t vpacc2x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot() 73 uint32x4_t vpacc3x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot() 75 uint32x4_t vpacc4x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot() 126 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot() 128 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot() 168 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot() 189 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot()
|
D | 2x16c4-minmax-fp32-neondot.c | 56 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() local 60 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() 99 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() 103 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() 133 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() 149 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot()
|
D | 6x8c4-minmax-rndnu-neondot.c | 71 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() local 73 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 75 uint32x4_t vpacc2x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 77 uint32x4_t vpacc3x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 79 uint32x4_t vpacc4x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 81 uint32x4_t vpacc5x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 138 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 140 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 186 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 210 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot()
|
D | 2x16c4-minmax-rndnu-neondot.c | 55 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot() local 59 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot() 98 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot() 102 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot() 132 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot() 148 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot()
|
D | 8x8c4-minmax-rndnu-neondot.c | 79 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() local 81 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 83 uint32x4_t vpacc2x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 85 uint32x4_t vpacc3x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 87 uint32x4_t vpacc4x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 89 uint32x4_t vpacc5x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 91 uint32x4_t vpacc6x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 93 uint32x4_t vpacc7x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 162 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 164 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() [all …]
|
D | 3x16c4-minmax-rndnu-neondot.c | 59 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() local 63 uint32x4_t vpacc1x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() 67 uint32x4_t vpacc2x0123 = vpacc0x0123; in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() 112 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() 116 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() 156 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() 177 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot()
|
D | 1x32c4-minmax-rndnu-neondot.c | 51 uint32x4_t vpacc0x0123 = vld1q_u32(w); w = (const void*) ((const uint32_t*) w + 4); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot() local 96 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot() 104 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot() 132 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot() 147 int32x4_t vacc0x0123 = vreinterpretq_s32_u32(vsubq_u32(vpacc0x0123, vnacc0x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot()
|