/external/XNNPACK/src/f32-ppmm/gen/ |
D | 4x2-minmax-scalar.c | 55 float vacc3x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() local 77 vacc3x1 += va3 * vb1; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 90 vacc3x1 = math_min_f32(vacc3x1, vmax); in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 100 vacc3x1 = math_max_f32(vacc3x1, vmin); in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 104 c3[1] = vacc3x1; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar()
|
D | 4x4-minmax-scalar.c | 61 float vacc3x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() local 87 vacc3x1 += va3 * vb1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 108 vacc3x1 = math_min_f32(vacc3x1, vmax); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 126 vacc3x1 = math_max_f32(vacc3x1, vmin); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 138 c3[1] = vacc3x1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 165 c3[1] = vacc3x1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
|
/external/XNNPACK/src/f32-spmm/gen/ |
D | 8x2-minmax-scalar.c | 52 float vacc3x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 82 vacc3x1 += vi3 * vw1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 100 float vout3x1 = math_min_f32(vacc3x1, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 231 float vacc3x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 249 vacc3x1 += vi3 * vw1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 259 float vout3x1 = math_min_f32(vacc3x1, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar()
|
D | 8x4-minmax-scalar.c | 52 float vacc3x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local 100 vacc3x1 += vi3 * vw1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 134 float vout3x1 = math_min_f32(vacc3x1, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 315 float vacc3x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local 343 vacc3x1 += vi3 * vw1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 361 float vout3x1 = math_min_f32(vacc3x1, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c8-minmax-neon-mull-padal.c | 84 int32x4_t vacc3x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local 138 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 213 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 269 const int32x2_t vpsum3x1 = vadd_s32(vget_low_s32(vacc3x1), vget_high_s32(vacc3x1)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 84 int32x4_t vacc3x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 158 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 265 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 340 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 396 const int32x2_t vpsum3x1 = vadd_s32(vget_low_s32(vacc3x1), vget_high_s32(vacc3x1)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 84 int32x4_t vacc3x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 152 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 245 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 301 const int32x2_t vpsum3x1 = vadd_s32(vget_low_s32(vacc3x1), vget_high_s32(vacc3x1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 108 int32x4_t vacc3x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 170 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 329 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 439 const int32x2_t vpsum3x1 = vadd_s32(vget_low_s32(vacc3x1), vget_high_s32(vacc3x1)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 108 int32x4_t vacc3x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 198 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 409 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 568 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 678 const int32x2_t vpsum3x1 = vadd_s32(vget_low_s32(vacc3x1), vget_high_s32(vacc3x1)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 108 int32x4_t vacc3x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 192 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 393 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 503 const int32x2_t vpsum3x1 = vadd_s32(vget_low_s32(vacc3x1), vget_high_s32(vacc3x1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8c8-minmax-neon-mull-padal.c | 87 int32x4_t vacc3x1 = vacc0x1; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local 121 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 193 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 249 const int32x2_t vpsum3x1 = vadd_s32(vget_low_s32(vacc3x1), vget_high_s32(vacc3x1)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 87 int32x4_t vacc3x1 = vacc0x1; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 141 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 248 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 320 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 376 const int32x2_t vpsum3x1 = vadd_s32(vget_low_s32(vacc3x1), vget_high_s32(vacc3x1)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 87 int32x4_t vacc3x1 = vacc0x1; in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 135 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 225 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 281 const int32x2_t vpsum3x1 = vadd_s32(vget_low_s32(vacc3x1), vget_high_s32(vacc3x1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 111 int32x4_t vacc3x1 = vacc0x1; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 153 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 309 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 419 const int32x2_t vpsum3x1 = vadd_s32(vget_low_s32(vacc3x1), vget_high_s32(vacc3x1)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 111 int32x4_t vacc3x1 = vacc0x1; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 181 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 392 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 548 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 658 const int32x2_t vpsum3x1 = vadd_s32(vget_low_s32(vacc3x1), vget_high_s32(vacc3x1)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 111 int32x4_t vacc3x1 = vacc0x1; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 175 vacc3x1 = vpadalq_s16(vacc3x1, vprod3x1); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 373 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 483 const int32x2_t vpsum3x1 = vadd_s32(vget_low_s32(vacc3x1), vget_high_s32(vacc3x1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|