/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 49 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 91 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 125 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 139 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 152 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mull-padal.c | 49 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal() local 76 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal() 90 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal() 103 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 49 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 84 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 98 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 111 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mull-padal.c | 55 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local 63 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() 102 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() 121 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() 140 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 55 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 63 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 124 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 177 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 196 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 215 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 55 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 63 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 116 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 137 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 156 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mull-padal.c | 61 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local 69 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 77 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 128 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 152 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 177 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 61 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 69 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 77 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 157 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 229 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 253 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 278 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 49 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local 84 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() 122 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() 141 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 67 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local 75 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 83 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 91 int32x4_t vacc3x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 154 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 183 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 214 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 61 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 69 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 77 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 148 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 176 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 201 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 49 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 107 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 173 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 211 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 230 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 67 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 75 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 83 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 91 int32x4_t vacc3x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 190 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 281 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 310 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 341 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c8-minmax-neon-mull-padal.c | 52 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal() local 87 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal() 104 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal() 117 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal()
|
D | 1x8c8-minmax-neon-mlal-padal.c | 52 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 102 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 136 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 153 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 166 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 52 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 95 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 112 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 125 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mull-padal.c | 56 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local 64 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() 115 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() 137 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() 156 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 56 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 64 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 137 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 190 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 212 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 231 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 56 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 64 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 129 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 153 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 172 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mull-padal.c | 60 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local 68 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 76 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 143 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 170 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 195 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 64 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local 72 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 80 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 88 int32x4_t vacc3x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 171 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 203 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 234 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 52 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local 95 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() 136 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() 155 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 52 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 118 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 184 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 225 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 244 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 60 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 68 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 76 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 163 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 194 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 219 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 60 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 68 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 76 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 172 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 244 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 271 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 296 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|