/external/XNNPACK/src/qs8-requantization/ |
D | rndnu-neon-mull.c | 84 …const int32x4_t w_scaled = vuzp1q_s32(vreinterpretq_s32_s64(w01_scaled), vreinterpretq_s32_s64(w23… in xnn_qs8_requantize_rndnu__neon_mull() local 87 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qs8_requantize_rndnu__neon_mull() 93 const int32x4_t w_scaled = vcombine_s32(vmovn_s64(w01_scaled), vmovn_s64(w23_scaled)); in xnn_qs8_requantize_rndnu__neon_mull() local 96 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qs8_requantize_rndnu__neon_mull()
|
D | rndnu-neon-qdmulh.c | 71 const int32x4_t w_scaled = vrshlq_s32(w_product, vpost_shift); in xnn_qs8_requantize_rndnu__neon_qdmulh() local 75 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qs8_requantize_rndnu__neon_qdmulh() 79 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qs8_requantize_rndnu__neon_qdmulh()
|
D | gemmlowp-neon.c | 79 const int32x4_t w_scaled = vrshlq_s32(w_adjusted_product, vshift); in xnn_qs8_requantize_gemmlowp__neon() local 83 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qs8_requantize_gemmlowp__neon() 87 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qs8_requantize_gemmlowp__neon()
|
D | fp32-neon.c | 59 const float32x4_t w_scaled = vmulq_f32(vcvtq_f32_s32(w), vscale); in xnn_qs8_requantize_fp32__neon() local 68 const int32x4_t w_rounded = vcvtnq_s32_f32(w_scaled); in xnn_qs8_requantize_fp32__neon() 117 const float32x4_t w_clamped = vminq_f32(vmaxq_f32(w_scaled, vfmin), vfmax); in xnn_qs8_requantize_fp32__neon()
|
D | rndna-neon.c | 112 …const int32x4_t w_scaled = vuzp1q_s32(vreinterpretq_s32_s64(w01_scaled), vreinterpretq_s32_s64(w23… in xnn_qs8_requantize_rndna__neon() local 115 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qs8_requantize_rndna__neon() 121 const int32x4_t w_scaled = vcombine_s32(vmovn_s64(w01_scaled), vmovn_s64(w23_scaled)); in xnn_qs8_requantize_rndna__neon() local 124 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qs8_requantize_rndna__neon()
|
D | fp32-scalar-fmagic.c | 45 const float w_scaled = (float) w * scale; in xnn_qs8_requantize_fp32__scalar_fmagic() local 50 const float w_clamped = math_min_f32(math_max_f32(w_scaled, fmin), fmax); in xnn_qs8_requantize_fp32__scalar_fmagic()
|
D | fp32-sse4.c | 51 const __m128 w_scaled = _mm_mul_ps(_mm_cvtepi32_ps(w), vscale); in xnn_qs8_requantize_fp32__sse4() local 67 const __m128i w_rounded = _mm_cvtps_epi32(w_scaled); in xnn_qs8_requantize_fp32__sse4()
|
D | fp32-scalar-lrintf.c | 43 const float w_scaled = (float) w * scale; in xnn_qs8_requantize_fp32__scalar_lrintf() local 48 const float w_clamped = math_min_f32(math_max_f32(w_scaled, fmin), fmax); in xnn_qs8_requantize_fp32__scalar_lrintf()
|
D | fp32-sse2.c | 51 const __m128 w_scaled = _mm_mul_ps(_mm_cvtepi32_ps(w), vscale); in xnn_qs8_requantize_fp32__sse2() local 67 const __m128i w_rounded = _mm_cvtps_epi32(w_scaled); in xnn_qs8_requantize_fp32__sse2()
|
D | fp32-wasmsimd.c | 49 const v128_t w_scaled = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(w), vscale); in xnn_qs8_requantize_fp32__wasmsimd() local 61 const v128_t w_clamped = wasm_f32x4_min(wasm_f32x4_max(w_scaled, vfmin), vfmax); in xnn_qs8_requantize_fp32__wasmsimd()
|
D | rndnu-scalar.c | 63 const int32_t w_scaled = (int32_t) math_asr_s64(w_product + rounding, shift); in xnn_qs8_requantize_rndnu__scalar() local 69 const int32_t w_clamped = math_min_s32(math_max_s32(w_scaled, smin), smax); in xnn_qs8_requantize_rndnu__scalar()
|
D | rndna-scalar-signed64.c | 69 const int32_t w_scaled = (int32_t) math_asr_s64(w_adjusted_product + rounding, shift); in xnn_qs8_requantize_rndna__scalar_signed64() local 75 const int32_t w_clamped = math_min_s32(math_max_s32(w_scaled, smin), smax); in xnn_qs8_requantize_rndna__scalar_signed64()
|
D | rndna-scalar-unsigned64.c | 76 const int32_t w_scaled = (int32_t) (w >= 0 ? w_abs_scaled : -w_abs_scaled); in xnn_qs8_requantize_rndna__scalar_unsigned64() local 82 const int32_t w_clamped = math_min_s32(math_max_s32(w_scaled, smin), smax); in xnn_qs8_requantize_rndna__scalar_unsigned64()
|
D | gemmlowp-scalar.c | 104 … const int32_t w_scaled = math_asr_s32(w_q31product, shift) + (int32_t) (w_remainder > threshold); in xnn_qs8_requantize_gemmlowp__scalar() local 110 const int32_t w_clamped = math_min_s32(math_max_s32(w_scaled, smin), smax); in xnn_qs8_requantize_gemmlowp__scalar()
|
D | rndnu-sse4-sra.c | 81 const __m128i w_scaled = _mm_sra_epi32(w_prescaled, vshift); in xnn_qs8_requantize_rndnu__sse4_sra() local 84 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qs8_requantize_rndnu__sse4_sra()
|
/external/XNNPACK/src/qu8-requantization/ |
D | gemmlowp-neon.c | 79 const int32x4_t w_scaled = vrshlq_s32(w_adjusted_product, vshift); in xnn_qu8_requantize_gemmlowp__neon() local 83 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qu8_requantize_gemmlowp__neon() 87 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qu8_requantize_gemmlowp__neon()
|
D | fp32-neon.c | 59 const float32x4_t w_scaled = vmulq_f32(vcvtq_f32_s32(w), vscale); in xnn_qu8_requantize_fp32__neon() local 68 const int32x4_t w_rounded = vcvtnq_s32_f32(w_scaled); in xnn_qu8_requantize_fp32__neon() 93 const float32x4_t w_clamped = vminq_f32(vmaxq_f32(w_scaled, vfmin), vfmax); in xnn_qu8_requantize_fp32__neon()
|
D | rndna-neon.c | 112 …const int32x4_t w_scaled = vuzp1q_s32(vreinterpretq_s32_s64(w01_scaled), vreinterpretq_s32_s64(w23… in xnn_qu8_requantize_rndna__neon() local 115 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qu8_requantize_rndna__neon() 121 const int32x4_t w_scaled = vcombine_s32(vmovn_s64(w01_scaled), vmovn_s64(w23_scaled)); in xnn_qu8_requantize_rndna__neon() local 124 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qu8_requantize_rndna__neon()
|
D | fp32-sse2.c | 51 const __m128 w_scaled = _mm_mul_ps(_mm_cvtepi32_ps(w), vscale); in xnn_qu8_requantize_fp32__sse2() local 67 const __m128i w_rounded = _mm_cvtps_epi32(w_scaled); in xnn_qu8_requantize_fp32__sse2()
|
D | fp32-scalar-fmagic.c | 45 const float w_scaled = (float) w * scale; in xnn_qu8_requantize_fp32__scalar_fmagic() local 50 const float w_clamped = math_min_f32(math_max_f32(w_scaled, fmin), fmax); in xnn_qu8_requantize_fp32__scalar_fmagic()
|
D | fp32-scalar-lrintf.c | 43 const float w_scaled = (float) w * scale; in xnn_qu8_requantize_fp32__scalar_lrintf() local 48 const float w_clamped = math_min_f32(math_max_f32(w_scaled, fmin), fmax); in xnn_qu8_requantize_fp32__scalar_lrintf()
|
D | fp32-wasmsimd.c | 49 const v128_t w_scaled = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(w), vscale); in xnn_qu8_requantize_fp32__wasmsimd() local 61 const v128_t w_clamped = wasm_f32x4_min(wasm_f32x4_max(w_scaled, vfmin), vfmax); in xnn_qu8_requantize_fp32__wasmsimd()
|
D | rndna-scalar-signed64.c | 69 const int32_t w_scaled = (int32_t) math_asr_s64(w_adjusted_product + rounding, shift); in xnn_qu8_requantize_rndna__scalar_signed64() local 75 const int32_t w_clamped = math_min_s32(math_max_s32(w_scaled, smin), smax); in xnn_qu8_requantize_rndna__scalar_signed64()
|
D | rndna-scalar-unsigned64.c | 76 const int32_t w_scaled = (int32_t) (w >= 0 ? w_abs_scaled : -w_abs_scaled); in xnn_qu8_requantize_rndna__scalar_unsigned64() local 82 const int32_t w_clamped = math_min_s32(math_max_s32(w_scaled, smin), smax); in xnn_qu8_requantize_rndna__scalar_unsigned64()
|
D | gemmlowp-scalar.c | 104 … const int32_t w_scaled = math_asr_s32(w_q31product, shift) + (int32_t) (w_remainder > threshold); in xnn_qu8_requantize_gemmlowp__scalar() local 110 const int32_t w_clamped = math_min_s32(math_max_s32(w_scaled, smin), smax); in xnn_qu8_requantize_gemmlowp__scalar()
|