/external/XNNPACK/src/f32-prelu/gen/ |
D | wasmsimd-minmax-4x16.c | 107 v128_t vacc2x89AB = wasm_i32x4_max(vi2x89AB, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() local
|
D | neon-4x16.c | 101 float32x4_t vacc2x89AB = vmulq_f32(vi2x89AB, vw89AB); in xnn_f32_prelu_ukernel__neon_4x16() local
|
D | wasmsimd-bitselect-4x16.c | 107 v128_t vacc2x89AB = wasm_f32x4_mul(vi2x89AB, vw89AB); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() local
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c8-minmax-avx512skx.c | 77 __m512i vacc2x89AB = vacc0x89AB; in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx() local
|
D | 4x16c8-minmax-avx512skx.c | 83 __m512i vacc2x89AB = vacc0x89AB; in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx() local
|
D | 4x16c4-minmax-neondot.c | 74 int32x4_t vacc2x89AB = vacc0x89AB; in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local
|
D | 3x16c8-minmax-neon-mull-padal.c | 264 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 343 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
|
D | 3x16c16-minmax-neon-mlal-padal.c | 312 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 391 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
|
D | 3x16-minmax-neon-mlal-lane.c | 65 int32x4_t vacc2x89AB = vacc0x89AB; in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane() local
|
D | 6x16c4-minmax-neondot.c | 86 int32x4_t vacc2x89AB = vacc0x89AB; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local
|
D | 4x16-minmax-neon-mlal-lane.c | 71 int32x4_t vacc2x89AB = vacc0x89AB; in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane() local
|
D | 4x16c16-minmax-neon-mlal-padal.c | 391 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 474 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
|
D | 4x16c8-minmax-neon-mull-padal.c | 327 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 410 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c8-minmax-avx512skx.c | 74 __m512i vacc2x89AB = vacc0x89AB; in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx() local
|
D | 4x16c4-minmax-neondot.c | 69 int32x4_t vacc2x89AB = vacc0x89AB; in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local
|
D | 4x16c8-minmax-avx512skx.c | 78 __m512i vacc2x89AB = vacc0x89AB; in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx() local
|
D | 6x16c4-minmax-neondot.c | 77 int32x4_t vacc2x89AB = vacc0x89AB; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local
|
D | 3x16c8-minmax-neon-mull-padal.c | 282 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 361 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
|
D | 3x16c16-minmax-neon-mlal-padal.c | 330 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 409 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
|
D | 3x16-minmax-neon-mlal-lane.c | 64 int32x4_t vacc2x89AB = vacc0x89AB; in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane() local
|
D | 8x16c4-minmax-neondot.c | 85 int32x4_t vacc2x89AB = vacc0x89AB; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local
|
D | 4x16-minmax-neon-mlal-lane.c | 68 int32x4_t vacc2x89AB = vacc0x89AB; in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane() local
|
D | 4x16c8-minmax-neon-mull-padal.c | 347 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 430 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
|
D | 4x16c16-minmax-neon-mlal-padal.c | 411 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 494 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
|
D | 3x16c8-minmax-neon-mlal-padal.c | 471 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 550 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
|