/external/XNNPACK/src/qu8-gemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 82 __m512i vacc3x89AB = vacc0x89AB; in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() local
|
D | 4x16c4-minmax-rndnu-neondot.c | 208 int32x4_t vacc3x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc3x89AB, vnacc3x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() local
|
D | 4x16c4-minmax-fp32-neondot.c | 209 int32x4_t vacc3x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc3x89AB, vnacc3x0123)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() local
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 82 __m512i vacc3x89AB = vacc0x89AB; in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() local
|
D | 4x16c4-minmax-rndnu-neondot.c | 79 int32x4_t vacc3x89AB = vacc0x89AB; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() local
|
D | 4x16c4s2-minmax-rndnu-neon-mull.c | 270 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local 317 int32x4_t vacc3x89AB = vcombine_s32(vsum3x89, vsum3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
|
D | 6x16c4-minmax-rndnu-neondot.c | 91 int32x4_t vacc3x89AB = vacc0x89AB; in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() local
|
D | 4x16c4-minmax-rndnu-neon-mull-ld2r.c | 363 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 410 int32x4_t vacc3x89AB = vcombine_s32(vsum3x89, vsum3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local
|
D | 4x16c4-minmax-rndnu-neon-mull-dup.c | 363 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local 410 int32x4_t vacc3x89AB = vcombine_s32(vsum3x89, vsum3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local
|
D | 4x16c4-minmax-rndnu-neon-mull-ld1r.c | 367 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 414 int32x4_t vacc3x89AB = vcombine_s32(vsum3x89, vsum3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 81 __m512i vacc3x89AB = vacc0x89AB; in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() local
|
D | 4x16c4-minmax-fp32-neondot.c | 80 int32x4_t vacc3x89AB = vacc0x89AB; in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot() local
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | wasmsimd-minmax-4x16.c | 116 v128_t vacc3x89AB = wasm_i32x4_max(vi3x89AB, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() local
|
D | neon-4x16.c | 110 float32x4_t vacc3x89AB = vmulq_f32(vi3x89AB, vw89AB); in xnn_f32_prelu_ukernel__neon_4x16() local
|
D | wasmsimd-bitselect-4x16.c | 115 v128_t vacc3x89AB = wasm_f32x4_mul(vi3x89AB, vw89AB); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() local
|
/external/XNNPACK/src/qu8-igemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 77 __m512i vacc3x89AB = vacc0x89AB; in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() local
|
D | 4x16c4-minmax-rndnu-neondot.c | 226 int32x4_t vacc3x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc3x89AB, vnacc3x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() local
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 77 __m512i vacc3x89AB = vacc0x89AB; in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() local
|
D | 4x16c4-minmax-rndnu-neondot.c | 73 int32x4_t vacc3x89AB = vacc0x89AB; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() local
|
D | 4x16c4s2-minmax-rndnu-neon-mull.c | 290 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local 337 int32x4_t vacc3x89AB = vcombine_s32(vsum3x89, vsum3xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
|
D | 6x16c4-minmax-rndnu-neondot.c | 81 int32x4_t vacc3x89AB = vacc0x89AB; in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() local
|
D | 4x16c4-minmax-rndnu-neon-mull-ld2r.c | 382 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 429 int32x4_t vacc3x89AB = vcombine_s32(vsum3x89, vsum3xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local
|
D | 4x16c4-minmax-rndnu-neon-mull-ld1r.c | 386 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 433 int32x4_t vacc3x89AB = vcombine_s32(vsum3x89, vsum3xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 76 __m512i vacc3x89AB = vacc0x89AB; in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() local
|
D | 4x16c4-minmax-fp32-neondot.c | 74 int32x4_t vacc3x89AB = vacc0x89AB; in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot() local
|