/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x16c8-minmax-neon-mull-padal.c | 52 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local 93 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() 124 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() 147 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 52 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 119 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 182 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 213 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 236 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 52 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 109 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 140 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 163 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 58 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 74 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 133 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 179 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 214 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 58 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 74 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 169 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 272 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 318 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 353 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 58 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 74 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 158 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 211 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 246 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 64 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 80 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 96 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 173 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 234 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 281 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 64 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 80 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 96 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 219 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 362 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 423 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 470 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 70 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 86 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 102 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 118 int32x4_t vacc3x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 213 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 289 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 348 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 64 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 80 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 96 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 207 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 282 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 329 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 70 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 86 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 102 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 118 int32x4_t vacc3x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 256 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 353 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 412 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 70 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 86 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 102 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 118 int32x4_t vacc3x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 269 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 452 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 528 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 587 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x16c8-minmax-neon-mull-padal.c | 55 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local 104 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() 138 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() 161 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 55 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 130 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 193 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 227 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 250 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 55 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 120 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 154 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 177 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 59 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 75 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 146 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 195 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 230 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 59 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 75 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 182 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 285 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 334 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 369 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 63 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 79 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 95 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 188 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 252 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 299 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 59 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 75 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 171 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 227 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 262 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 67 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 83 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 99 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 115 int32x4_t vacc3x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 230 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 309 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 368 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 63 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 79 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 95 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 222 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 300 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 347 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 63 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 79 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 95 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 234 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 377 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 441 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 488 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 67 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 83 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 99 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 115 int32x4_t vacc3x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 273 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 373 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 432 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 67 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 83 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 99 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 115 int32x4_t vacc3x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 286 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 469 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 548 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 607 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|