/external/XNNPACK/src/f32-ppmm/gen/ |
D | 4x2-minmax-scalar.c | 54 float vacc3x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() local 73 vacc3x0 += va3 * vb0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 86 vacc3x0 = math_min_f32(vacc3x0, vmax); in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 96 vacc3x0 = math_max_f32(vacc3x0, vmin); in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 103 c3[0] = vacc3x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 122 *c3 = vacc3x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar()
|
D | 4x4-minmax-scalar.c | 60 float vacc3x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() local 83 vacc3x0 += va3 * vb0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 104 vacc3x0 = math_min_f32(vacc3x0, vmax); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 122 vacc3x0 = math_max_f32(vacc3x0, vmin); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 137 c3[0] = vacc3x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 164 c3[0] = vacc3x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 173 vacc3x0 = vacc3x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 184 *c3 = vacc3x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
|
/external/XNNPACK/src/f32-spmm/gen/ |
D | 8x1-minmax-scalar.c | 44 float vacc3x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() local 65 vacc3x0 += vi3 * vw0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 75 float vout3x0 = math_min_f32(vacc3x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 185 float vacc3x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() local 198 vacc3x0 += vi3 * vw0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 204 float vout3x0 = math_min_f32(vacc3x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar()
|
D | 8x2-minmax-scalar.c | 44 float vacc3x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 74 vacc3x0 += vi3 * vw0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 92 float vout3x0 = math_min_f32(vacc3x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 227 float vacc3x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 245 vacc3x0 += vi3 * vw0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 255 float vout3x0 = math_min_f32(vacc3x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar()
|
D | 4x1-minmax-scalar.c | 44 float vacc3x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() local 57 vacc3x0 += vi3 * vw0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 63 float vout3x0 = math_min_f32(vacc3x0, vmax); in xnn_f32_spmm_minmax_ukernel_4x1__scalar()
|
D | 8x4-minmax-scalar.c | 44 float vacc3x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local 92 vacc3x0 += vi3 * vw0; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 126 float vout3x0 = math_min_f32(vacc3x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 311 float vacc3x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local 339 vacc3x0 += vi3 * vw0; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 357 float vout3x0 = math_min_f32(vacc3x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c8-minmax-neon-mull-padal.c | 83 int32x4_t vacc3x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local 129 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 213 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 268 const int32x2_t vpsum3x0 = vadd_s32(vget_low_s32(vacc3x0), vget_high_s32(vacc3x0)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 83 int32x4_t vacc3x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 145 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 256 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 340 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 395 const int32x2_t vpsum3x0 = vadd_s32(vget_low_s32(vacc3x0), vget_high_s32(vacc3x0)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 83 int32x4_t vacc3x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 140 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 245 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 300 const int32x2_t vpsum3x0 = vadd_s32(vget_low_s32(vacc3x0), vget_high_s32(vacc3x0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 107 int32x4_t vacc3x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 161 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 329 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 438 const int32x2_t vpsum3x0 = vadd_s32(vget_low_s32(vacc3x0), vget_high_s32(vacc3x0)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 107 int32x4_t vacc3x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 185 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 400 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 568 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 677 const int32x2_t vpsum3x0 = vadd_s32(vget_low_s32(vacc3x0), vget_high_s32(vacc3x0)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 107 int32x4_t vacc3x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 180 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 393 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 502 const int32x2_t vpsum3x0 = vadd_s32(vget_low_s32(vacc3x0), vget_high_s32(vacc3x0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8c8-minmax-neon-mull-padal.c | 86 int32x4_t vacc3x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local 112 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 193 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 248 const int32x2_t vpsum3x0 = vadd_s32(vget_low_s32(vacc3x0), vget_high_s32(vacc3x0)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 86 int32x4_t vacc3x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 128 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 239 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 320 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 375 const int32x2_t vpsum3x0 = vadd_s32(vget_low_s32(vacc3x0), vget_high_s32(vacc3x0)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 86 int32x4_t vacc3x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 123 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 225 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 280 const int32x2_t vpsum3x0 = vadd_s32(vget_low_s32(vacc3x0), vget_high_s32(vacc3x0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 110 int32x4_t vacc3x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 144 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 309 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 418 const int32x2_t vpsum3x0 = vadd_s32(vget_low_s32(vacc3x0), vget_high_s32(vacc3x0)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 110 int32x4_t vacc3x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 168 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 383 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 548 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 657 const int32x2_t vpsum3x0 = vadd_s32(vget_low_s32(vacc3x0), vget_high_s32(vacc3x0)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 110 int32x4_t vacc3x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 163 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 373 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 482 const int32x2_t vpsum3x0 = vadd_s32(vget_low_s32(vacc3x0), vget_high_s32(vacc3x0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|