/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 50 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 95 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 128 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 140 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 153 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mull-padal.c | 50 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal() local 79 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal() 91 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal() 104 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 50 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 87 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 99 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 112 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mull-padal.c | 56 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local 64 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() 107 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() 122 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() 141 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 56 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 64 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 131 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 182 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 197 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 216 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 56 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 64 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 122 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 138 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 157 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mull-padal.c | 62 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local 70 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 78 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 135 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 153 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 178 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 62 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 70 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 78 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 167 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 236 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 254 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 279 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 50 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local 87 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() 123 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() 142 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 68 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local 76 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 84 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 92 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 163 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 184 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 215 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 62 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 70 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 78 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 157 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 177 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 202 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 50 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 111 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 176 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 212 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 231 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 68 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 76 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 84 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 92 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 203 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 290 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 311 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 342 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c8-minmax-neon-mull-padal.c | 53 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal() local 90 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal() 105 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal() 118 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal()
|
D | 1x8c8-minmax-neon-mlal-padal.c | 53 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 106 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 139 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 154 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 167 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 53 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 98 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 113 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 126 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mull-padal.c | 57 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local 65 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() 120 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() 138 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() 157 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 57 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 65 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 144 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 195 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 213 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 232 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 57 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 65 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 135 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 154 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 173 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mull-padal.c | 61 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local 69 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 77 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 150 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 171 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 196 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 65 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local 73 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 81 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 89 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 180 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 204 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 235 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 53 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local 98 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() 137 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() 156 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 53 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 122 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 187 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 226 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 245 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 61 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 69 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 77 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 172 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 195 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 220 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 61 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 69 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 77 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 182 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 251 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 272 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 297 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|