/external/XNNPACK/src/qs8-requantization/ |
D | fp32-neon.c | 76 const int8x16_t xyzw_packed = vqmovn_high_s16(vqmovn_s16(xy_packed), zw_packed); in xnn_qs8_requantize_fp32__neon() local 77 const int8x16_t xyzw_clamped = vmaxq_s8(vminq_s8(xyzw_packed, vqmax), vqmin); in xnn_qs8_requantize_fp32__neon() 129 const int8x16_t xyzw_packed = vcombine_s8(vmovn_s16(xy_packed), vmovn_s16(zw_packed)); in xnn_qs8_requantize_fp32__neon() local 131 vst1q_s8(output, xyzw_packed); output += 16; in xnn_qs8_requantize_fp32__neon()
|
D | q31-neon.c | 85 const int8x16_t xyzw_packed = vqmovn_high_s16(vqmovn_s16(xy_packed), zw_packed); in xnn_qs8_requantize_q31__neon() local 89 const int8x16_t xyzw_packed = vcombine_s8(vqmovn_s16(xy_packed), vqmovn_s16(zw_packed)); in xnn_qs8_requantize_q31__neon() local 92 const int8x16_t xyzw_clamped = vmaxq_s8(vminq_s8(xyzw_packed, vqmax), vqmin); in xnn_qs8_requantize_q31__neon()
|
D | fp32-sse4.c | 76 const __m128i xyzw_packed = _mm_packs_epi16(xy_packed, zw_packed); in xnn_qs8_requantize_fp32__sse4() local 77 const __m128i xyzw_clamped = _mm_max_epi8(_mm_min_epi8(xyzw_packed, vqmax), vqmin); in xnn_qs8_requantize_fp32__sse4()
|
D | fp32-wasmsimd.c | 74 …const v128_t xyzw_packed = wasm_v8x16_shuffle(xy_packed, zw_packed, 0, 2, 4, 6, 8, 10, 12, 14, 16,… in xnn_qs8_requantize_fp32__wasmsimd() local 86 wasm_v128_store(output, xyzw_packed); in xnn_qs8_requantize_fp32__wasmsimd()
|
D | precise-neon.c | 117 const int8x16_t xyzw_packed = vqmovn_high_s16(vqmovn_s16(xy_packed), zw_packed); in xnn_qs8_requantize_precise__neon() local 126 const int8x16_t xyzw_packed = vcombine_s8(vqmovn_s16(xy_packed), vqmovn_s16(zw_packed)); in xnn_qs8_requantize_precise__neon() local 129 const int8x16_t xyzw_clamped = vmaxq_s8(vminq_s8(xyzw_packed, vqmax), vqmin); in xnn_qs8_requantize_precise__neon()
|
D | precise-sse4.c | 95 const __m128i xyzw_packed = _mm_packs_epi16(xy_packed, zw_packed); in xnn_qs8_requantize_precise__sse4() local 96 const __m128i xyzw_clamped = _mm_max_epi8(_mm_min_epi8(xyzw_packed, vqmax), vqmin); in xnn_qs8_requantize_precise__sse4()
|
D | q31-sse4.c | 111 const __m128i xyzw_packed = _mm_packs_epi16(xy_packed, zw_packed); in xnn_qs8_requantize_q31__sse4() local 112 const __m128i xyzw_clamped = _mm_max_epi8(_mm_min_epi8(xyzw_packed, vqmax), vqmin); in xnn_qs8_requantize_q31__sse4()
|
D | q31-wasmsimd.c | 114 const v128_t xyzw_packed = wasm_i8x16_narrow_i16x8(xy_packed, zw_packed); in xnn_qs8_requantize_q31__wasmsimd() local 115 const v128_t xyzw_clamped = wasm_i8x16_min(wasm_i8x16_max(xyzw_packed, vqmin), vqmax); in xnn_qs8_requantize_q31__wasmsimd()
|
/external/XNNPACK/src/qu8-requantization/ |
D | fp32-neon.c | 76 const uint8x16_t xyzw_packed = vqmovun_high_s16(vqmovun_s16(xy_packed), zw_packed); in xnn_qu8_requantize_fp32__neon() local 77 const uint8x16_t xyzw_clamped = vmaxq_u8(vminq_u8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_fp32__neon() 105 …const uint8x16_t xyzw_packed = vreinterpretq_u8_s8(vcombine_s8(vmovn_s16(xy_packed), vmovn_s16(zw_… in xnn_qu8_requantize_fp32__neon() local 133 vst1q_u8(output, xyzw_packed); in xnn_qu8_requantize_fp32__neon()
|
D | q31-neon.c | 85 const uint8x16_t xyzw_packed = vqmovun_high_s16(vqmovun_s16(xy_packed), zw_packed); in xnn_qu8_requantize_q31__neon() local 89 const uint8x16_t xyzw_packed = vcombine_u8(vqmovun_s16(xy_packed), vqmovun_s16(zw_packed)); in xnn_qu8_requantize_q31__neon() local 92 const uint8x16_t xyzw_clamped = vmaxq_u8(vminq_u8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_q31__neon()
|
D | fp32-sse2.c | 76 const __m128i xyzw_packed = _mm_packus_epi16(xy_packed, zw_packed); in xnn_qu8_requantize_fp32__sse2() local 77 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_fp32__sse2()
|
D | fp32-wasmsimd.c | 74 …const v128_t xyzw_packed = wasm_v8x16_shuffle(xy_packed, zw_packed, 0, 2, 4, 6, 8, 10, 12, 14, 16,… in xnn_qu8_requantize_fp32__wasmsimd() local 86 wasm_v128_store(output, xyzw_packed); in xnn_qu8_requantize_fp32__wasmsimd()
|
D | precise-neon.c | 117 const uint8x16_t xyzw_packed = vqmovun_high_s16(vqmovun_s16(xy_packed), zw_packed); in xnn_qu8_requantize_precise__neon() local 126 const uint8x16_t xyzw_packed = vcombine_u8(vqmovun_s16(xy_packed), vqmovun_s16(zw_packed)); in xnn_qu8_requantize_precise__neon() local 129 const uint8x16_t xyzw_clamped = vmaxq_u8(vminq_u8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_precise__neon()
|
D | precise-sse4.c | 95 const __m128i xyzw_packed = _mm_packus_epi16(xy_packed, zw_packed); in xnn_qu8_requantize_precise__sse4() local 96 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_precise__sse4()
|
D | precise-ssse3.c | 103 const __m128i xyzw_packed = _mm_packus_epi16(xy_packed, zw_packed); in xnn_qu8_requantize_precise__ssse3() local 104 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_precise__ssse3()
|
D | q31-sse4.c | 111 const __m128i xyzw_packed = _mm_packus_epi16(xy_packed, zw_packed); in xnn_qu8_requantize_q31__sse4() local 112 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_q31__sse4()
|
D | precise-sse2.c | 108 const __m128i xyzw_packed = _mm_packus_epi16(xy_packed, zw_packed); in xnn_qu8_requantize_precise__sse2() local 109 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_precise__sse2()
|
D | q31-wasmsimd.c | 114 const v128_t xyzw_packed = wasm_u8x16_narrow_i16x8(xy_packed, zw_packed); in xnn_qu8_requantize_q31__wasmsimd() local 115 const v128_t xyzw_clamped = wasm_u8x16_min(wasm_u8x16_max(xyzw_packed, vqmin), vqmax); in xnn_qu8_requantize_q31__wasmsimd()
|
D | q31-ssse3.c | 160 const __m128i xyzw_packed = _mm_packus_epi16(xy_packed, zw_packed); in xnn_qu8_requantize_q31__ssse3() local 161 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_q31__ssse3()
|
D | q31-sse2.c | 160 const __m128i xyzw_packed = _mm_packus_epi16(xy_packed, zw_packed); in xnn_qu8_requantize_q31__sse2() local 161 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_q31__sse2()
|