/external/XNNPACK/src/qu8-requantization/ |
D | fp32-scalar-magic.c | 45 const float z_scaled = (float) z * scale; in xnn_qu8_requantize_fp32__scalar_magic() local 50 const float z_clamped = z_scaled < fmin ? fmin : z_scaled > fmax ? fmax : z_scaled; in xnn_qu8_requantize_fp32__scalar_magic()
|
D | precise-scalar-signed64.c | 70 const int32_t z_scaled = (int32_t) asr_s64(z_adjusted_product + rounding, shift); in xnn_qu8_requantize_precise__scalar_signed64() local 76 const int32_t z_clamped = z_scaled < smin ? smin : z_scaled > smax ? smax : z_scaled; in xnn_qu8_requantize_precise__scalar_signed64()
|
D | precise-scalar-unsigned64.c | 77 const int32_t z_scaled = (int32_t)(z >= 0 ? z_abs_scaled : -z_abs_scaled); in xnn_qu8_requantize_precise__scalar_unsigned64() local 83 const int32_t z_clamped = z_scaled < smin ? smin : z_scaled > smax ? smax : z_scaled; in xnn_qu8_requantize_precise__scalar_unsigned64()
|
D | q31-scalar.c | 105 const int32_t z_scaled = asr_s32(z_q31product, shift) + (int32_t)(z_remainder > threshold); in xnn_qu8_requantize_q31__scalar() local 111 const int32_t z_clamped = z_scaled < smin ? smin : z_scaled > smax ? smax : z_scaled; in xnn_qu8_requantize_q31__scalar()
|
D | precise-scalar-unsigned32.c | 105 const int32_t z_scaled = (int32_t)(z >= 0 ? z_abs_scaled : -z_abs_scaled); in xnn_qu8_requantize_precise__scalar_unsigned32() local 111 const int32_t z_clamped = z_scaled < smin ? smin : z_scaled > smax ? smax : z_scaled; in xnn_qu8_requantize_precise__scalar_unsigned32()
|
D | q31-neon.c | 79 const int32x4_t z_scaled = vrshlq_s32(z_adjusted_product, vshift); in xnn_qu8_requantize_q31__neon() local 84 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qu8_requantize_q31__neon() 88 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qu8_requantize_q31__neon()
|
D | fp32-neon.c | 57 const float32x4_t z_scaled = vmulq_f32(vcvtq_f32_s32(z), vscale); in xnn_qu8_requantize_fp32__neon() local 66 const int32x4_t z_rounded = vcvtnq_s32_f32(z_scaled); in xnn_qu8_requantize_fp32__neon() 91 const float32x4_t z_clamped = vminq_f32(vmaxq_f32(z_scaled, vfmin), vfmax); in xnn_qu8_requantize_fp32__neon()
|
D | precise-neon.c | 112 …const int32x4_t z_scaled = vuzp1q_s32(vreinterpretq_s32_s64(z01_scaled), vreinterpretq_s32_s64(z23… in xnn_qu8_requantize_precise__neon() local 116 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qu8_requantize_precise__neon() 121 const int32x4_t z_scaled = vcombine_s32(vmovn_s64(z01_scaled), vmovn_s64(z23_scaled)); in xnn_qu8_requantize_precise__neon() local 125 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qu8_requantize_precise__neon()
|
D | fp32-sse2.c | 50 const __m128 z_scaled = _mm_mul_ps(_mm_cvtepi32_ps(z), vscale); in xnn_qu8_requantize_fp32__sse2() local 66 const __m128i z_rounded = _mm_cvtps_epi32(z_scaled); in xnn_qu8_requantize_fp32__sse2()
|
D | fp32-scalar-lrintf.c | 43 const float z_scaled = (float) z * scale; in xnn_qu8_requantize_fp32__scalar_lrintf() local 48 const long z_rounded = lrintf(z_scaled); in xnn_qu8_requantize_fp32__scalar_lrintf()
|
D | fp32-wasmsimd.c | 48 const v128_t z_scaled = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(z), vscale); in xnn_qu8_requantize_fp32__wasmsimd() local 60 const v128_t z_clamped = wasm_f32x4_min(wasm_f32x4_max(z_scaled, vfmin), vfmax); in xnn_qu8_requantize_fp32__wasmsimd()
|
D | precise-sse4.c | 90 const __m128i z_scaled = _mm_sign_epi32(z_abs_scaled, z); in xnn_qu8_requantize_precise__sse4() local 94 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qu8_requantize_precise__sse4()
|
/external/XNNPACK/src/qs8-requantization/ |
D | fp32-scalar-magic.c | 45 const float z_scaled = (float) z * scale; in xnn_qs8_requantize_fp32__scalar_magic() local 50 const float z_clamped = z_scaled < fmin ? fmin : z_scaled > fmax ? fmax : z_scaled; in xnn_qs8_requantize_fp32__scalar_magic()
|
D | precise-scalar-signed64.c | 70 const int32_t z_scaled = (int32_t) asr_s64(z_adjusted_product + rounding, shift); in xnn_qs8_requantize_precise__scalar_signed64() local 76 const int32_t z_clamped = z_scaled < smin ? smin : z_scaled > smax ? smax : z_scaled; in xnn_qs8_requantize_precise__scalar_signed64()
|
D | precise-scalar-unsigned64.c | 77 const int32_t z_scaled = (int32_t) (z >= 0 ? z_abs_scaled : -z_abs_scaled); in xnn_qs8_requantize_precise__scalar_unsigned64() local 83 const int32_t z_clamped = z_scaled < smin ? smin : z_scaled > smax ? smax : z_scaled; in xnn_qs8_requantize_precise__scalar_unsigned64()
|
D | q31-scalar.c | 105 const int32_t z_scaled = asr_s32(z_q31product, shift) + (int32_t) (z_remainder > threshold); in xnn_qs8_requantize_q31__scalar() local 111 const int32_t z_clamped = z_scaled < smin ? smin : z_scaled > smax ? smax : z_scaled; in xnn_qs8_requantize_q31__scalar()
|
D | precise-scalar-unsigned32.c | 105 const int32_t z_scaled = (int32_t) (z >= 0 ? z_abs_scaled : -z_abs_scaled); in xnn_qs8_requantize_precise__scalar_unsigned32() local 111 const int32_t z_clamped = z_scaled < smin ? smin : z_scaled > smax ? smax : z_scaled; in xnn_qs8_requantize_precise__scalar_unsigned32()
|
D | q31-neon.c | 79 const int32x4_t z_scaled = vrshlq_s32(z_adjusted_product, vshift); in xnn_qs8_requantize_q31__neon() local 84 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qs8_requantize_q31__neon() 88 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qs8_requantize_q31__neon()
|
D | fp32-neon.c | 57 const float32x4_t z_scaled = vmulq_f32(vcvtq_f32_s32(z), vscale); in xnn_qs8_requantize_fp32__neon() local 66 const int32x4_t z_rounded = vcvtnq_s32_f32(z_scaled); in xnn_qs8_requantize_fp32__neon() 115 const float32x4_t z_clamped = vminq_f32(vmaxq_f32(z_scaled, vfmin), vfmax); in xnn_qs8_requantize_fp32__neon()
|
D | precise-neon.c | 112 …const int32x4_t z_scaled = vuzp1q_s32(vreinterpretq_s32_s64(z01_scaled), vreinterpretq_s32_s64(z23… in xnn_qs8_requantize_precise__neon() local 116 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qs8_requantize_precise__neon() 121 const int32x4_t z_scaled = vcombine_s32(vmovn_s64(z01_scaled), vmovn_s64(z23_scaled)); in xnn_qs8_requantize_precise__neon() local 125 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qs8_requantize_precise__neon()
|
D | fp32-sse4.c | 50 const __m128 z_scaled = _mm_mul_ps(_mm_cvtepi32_ps(z), vscale); in xnn_qs8_requantize_fp32__sse4() local 66 const __m128i z_rounded = _mm_cvtps_epi32(z_scaled); in xnn_qs8_requantize_fp32__sse4()
|
D | fp32-scalar-lrintf.c | 43 const float z_scaled = (float) z * scale; in xnn_qs8_requantize_fp32__scalar_lrintf() local 48 const long z_rounded = lrintf(z_scaled); in xnn_qs8_requantize_fp32__scalar_lrintf()
|
D | fp32-sse2.c | 50 const __m128 z_scaled = _mm_mul_ps(_mm_cvtepi32_ps(z), vscale); in xnn_qs8_requantize_fp32__sse2() local 66 const __m128i z_rounded = _mm_cvtps_epi32(z_scaled); in xnn_qs8_requantize_fp32__sse2()
|
D | fp32-wasmsimd.c | 48 const v128_t z_scaled = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(z), vscale); in xnn_qs8_requantize_fp32__wasmsimd() local 60 const v128_t z_clamped = wasm_f32x4_min(wasm_f32x4_max(z_scaled, vfmin), vfmax); in xnn_qs8_requantize_fp32__wasmsimd()
|
D | precise-sse4.c | 90 const __m128i z_scaled = _mm_sign_epi32(z_abs_scaled, z); in xnn_qs8_requantize_precise__sse4() local 94 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qs8_requantize_precise__sse4()
|