/external/XNNPACK/src/f16-f32-vcvt/gen/ |
D | vcvt-neon-int32-x8.c | 46 const uint32x4_t vnonsign_hi = veorq_u32(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__neon_int32_x8() local 49 …_t vnorm_hi = vmulq_f32(vreinterpretq_f32_u32(vsraq_n_u32(vexp_offset, vnonsign_hi, 3)), vexp_scal… in xnn_f16_f32_vcvt_ukernel__neon_int32_x8() 52 … vdenorm_hi = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_hi, 16)), vreinter… in xnn_f16_f32_vcvt_ukernel__neon_int32_x8() 57 const uint32x4_t vxmask_hi = vcgtq_u32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__neon_int32_x8() 73 const uint32x4_t vnonsign_hi = veorq_u32(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__neon_int32_x8() local 76 …_t vnorm_hi = vmulq_f32(vreinterpretq_f32_u32(vsraq_n_u32(vexp_offset, vnonsign_hi, 3)), vexp_scal… in xnn_f16_f32_vcvt_ukernel__neon_int32_x8() 79 … vdenorm_hi = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_hi, 16)), vreinter… in xnn_f16_f32_vcvt_ukernel__neon_int32_x8() 87 const uint32x4_t vxmask_hi = vcgtq_u32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__neon_int32_x8()
|
D | vcvt-wasmsimd-int32-x8.c | 48 const v128_t vnonsign_hi = wasm_v128_xor(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x8() local 51 …const v128_t vnorm_hi = wasm_f32x4_mul(wasm_i32x4_add(wasm_u32x4_shr(vnonsign_hi, 3), vexp_offset)… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x8() 54 …const v128_t vdenorm_hi = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_hi, 16), vmagic_bias… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x8() 57 const v128_t vxmask_hi = wasm_i32x4_gt(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x8() 79 const v128_t vnonsign_hi = wasm_v128_xor(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x8() local 82 …const v128_t vnorm_hi = wasm_f32x4_mul(wasm_i32x4_add(wasm_u32x4_shr(vnonsign_hi, 3), vexp_offset)… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x8() 85 …const v128_t vdenorm_hi = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_hi, 16), vmagic_bias… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x8() 94 const v128_t vxmask_hi = wasm_i32x4_gt(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x8()
|
D | vcvt-avx-int32-x8.c | 47 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__avx_int32_x8() local 50 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__avx_int32_x8() 53 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__avx_int32_x8() 58 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x8() 75 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__avx_int32_x8() local 78 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__avx_int32_x8() 81 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__avx_int32_x8() 90 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x8()
|
D | vcvt-sse41-int32-x8.c | 47 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8() local 50 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8() 53 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8() 58 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8() 75 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8() local 78 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8() 81 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8() 90 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8()
|
D | vcvt-sse2-int32-x8.c | 47 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8() local 50 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8() 53 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8() 59 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8() 77 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8() local 80 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8() 83 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8() 93 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8()
|
D | vcvt-neon-int32-x16.c | 90 const uint32x4_t vnonsign_hi = veorq_u32(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__neon_int32_x16() local 93 …_t vnorm_hi = vmulq_f32(vreinterpretq_f32_u32(vsraq_n_u32(vexp_offset, vnonsign_hi, 3)), vexp_scal… in xnn_f16_f32_vcvt_ukernel__neon_int32_x16() 96 … vdenorm_hi = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_hi, 16)), vreinter… in xnn_f16_f32_vcvt_ukernel__neon_int32_x16() 101 const uint32x4_t vxmask_hi = vcgtq_u32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__neon_int32_x16() 117 const uint32x4_t vnonsign_hi = veorq_u32(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__neon_int32_x16() local 120 …_t vnorm_hi = vmulq_f32(vreinterpretq_f32_u32(vsraq_n_u32(vexp_offset, vnonsign_hi, 3)), vexp_scal… in xnn_f16_f32_vcvt_ukernel__neon_int32_x16() 123 … vdenorm_hi = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_hi, 16)), vreinter… in xnn_f16_f32_vcvt_ukernel__neon_int32_x16() 131 const uint32x4_t vxmask_hi = vcgtq_u32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__neon_int32_x16()
|
D | vcvt-wasmsimd-int32-x16.c | 95 const v128_t vnonsign_hi = wasm_v128_xor(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16() local 98 …const v128_t vnorm_hi = wasm_f32x4_mul(wasm_i32x4_add(wasm_u32x4_shr(vnonsign_hi, 3), vexp_offset)… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16() 101 …const v128_t vdenorm_hi = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_hi, 16), vmagic_bias… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16() 104 const v128_t vxmask_hi = wasm_i32x4_gt(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16() 126 const v128_t vnonsign_hi = wasm_v128_xor(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16() local 129 …const v128_t vnorm_hi = wasm_f32x4_mul(wasm_i32x4_add(wasm_u32x4_shr(vnonsign_hi, 3), vexp_offset)… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16() 132 …const v128_t vdenorm_hi = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_hi, 16), vmagic_bias… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16() 141 const v128_t vxmask_hi = wasm_i32x4_gt(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16()
|
D | vcvt-avx-int32-x16.c | 93 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() local 96 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() 99 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() 104 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() 121 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() local 124 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() 127 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() 136 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16()
|
D | vcvt-sse41-int32-x16.c | 93 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() local 96 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() 99 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() 104 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() 121 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() local 124 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() 127 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() 136 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16()
|
D | vcvt-sse2-int32-x16.c | 97 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16() local 100 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16() 103 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16() 109 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16() 127 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16() local 130 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16() 133 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16() 143 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16()
|
D | vcvt-wasmsimd-int32-x24.c | 112 const v128_t vnonsign_hi = wasm_v128_xor(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24() local 115 …const v128_t vnorm_hi = wasm_f32x4_mul(wasm_i32x4_add(wasm_u32x4_shr(vnonsign_hi, 3), vexp_offset)… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24() 118 …const v128_t vdenorm_hi = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_hi, 16), vmagic_bias… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24() 121 const v128_t vxmask_hi = wasm_i32x4_gt(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24() 143 const v128_t vnonsign_hi = wasm_v128_xor(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24() local 146 …const v128_t vnorm_hi = wasm_f32x4_mul(wasm_i32x4_add(wasm_u32x4_shr(vnonsign_hi, 3), vexp_offset)… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24() 149 …const v128_t vdenorm_hi = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_hi, 16), vmagic_bias… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24() 158 const v128_t vxmask_hi = wasm_i32x4_gt(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24()
|
D | vcvt-sse41-int32-x24.c | 110 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24() local 113 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24() 116 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24() 121 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24() 138 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24() local 141 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24() 144 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24() 153 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24()
|
D | vcvt-neon-int32-x24.c | 107 const uint32x4_t vnonsign_hi = veorq_u32(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__neon_int32_x24() local 110 …_t vnorm_hi = vmulq_f32(vreinterpretq_f32_u32(vsraq_n_u32(vexp_offset, vnonsign_hi, 3)), vexp_scal… in xnn_f16_f32_vcvt_ukernel__neon_int32_x24() 113 … vdenorm_hi = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_hi, 16)), vreinter… in xnn_f16_f32_vcvt_ukernel__neon_int32_x24() 118 const uint32x4_t vxmask_hi = vcgtq_u32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__neon_int32_x24() 134 const uint32x4_t vnonsign_hi = veorq_u32(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__neon_int32_x24() local 137 …_t vnorm_hi = vmulq_f32(vreinterpretq_f32_u32(vsraq_n_u32(vexp_offset, vnonsign_hi, 3)), vexp_scal… in xnn_f16_f32_vcvt_ukernel__neon_int32_x24() 140 … vdenorm_hi = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_hi, 16)), vreinter… in xnn_f16_f32_vcvt_ukernel__neon_int32_x24() 148 const uint32x4_t vxmask_hi = vcgtq_u32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__neon_int32_x24()
|
D | vcvt-avx-int32-x24.c | 110 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__avx_int32_x24() local 113 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__avx_int32_x24() 116 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__avx_int32_x24() 121 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x24() 138 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__avx_int32_x24() local 141 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__avx_int32_x24() 144 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__avx_int32_x24() 153 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x24()
|
D | vcvt-sse2-int32-x24.c | 116 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24() local 119 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24() 122 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24() 128 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24() 146 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24() local 149 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24() 152 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24() 162 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24()
|
D | vcvt-neon-int32-x32.c | 124 const uint32x4_t vnonsign_hi = veorq_u32(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__neon_int32_x32() local 127 …_t vnorm_hi = vmulq_f32(vreinterpretq_f32_u32(vsraq_n_u32(vexp_offset, vnonsign_hi, 3)), vexp_scal… in xnn_f16_f32_vcvt_ukernel__neon_int32_x32() 130 … vdenorm_hi = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_hi, 16)), vreinter… in xnn_f16_f32_vcvt_ukernel__neon_int32_x32() 135 const uint32x4_t vxmask_hi = vcgtq_u32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__neon_int32_x32() 151 const uint32x4_t vnonsign_hi = veorq_u32(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__neon_int32_x32() local 154 …_t vnorm_hi = vmulq_f32(vreinterpretq_f32_u32(vsraq_n_u32(vexp_offset, vnonsign_hi, 3)), vexp_scal… in xnn_f16_f32_vcvt_ukernel__neon_int32_x32() 157 … vdenorm_hi = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_hi, 16)), vreinter… in xnn_f16_f32_vcvt_ukernel__neon_int32_x32() 165 const uint32x4_t vxmask_hi = vcgtq_u32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__neon_int32_x32()
|
D | vcvt-wasmsimd-int32-x32.c | 129 const v128_t vnonsign_hi = wasm_v128_xor(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32() local 132 …const v128_t vnorm_hi = wasm_f32x4_mul(wasm_i32x4_add(wasm_u32x4_shr(vnonsign_hi, 3), vexp_offset)… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32() 135 …const v128_t vdenorm_hi = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_hi, 16), vmagic_bias… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32() 138 const v128_t vxmask_hi = wasm_i32x4_gt(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32() 160 const v128_t vnonsign_hi = wasm_v128_xor(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32() local 163 …const v128_t vnorm_hi = wasm_f32x4_mul(wasm_i32x4_add(wasm_u32x4_shr(vnonsign_hi, 3), vexp_offset)… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32() 166 …const v128_t vdenorm_hi = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_hi, 16), vmagic_bias… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32() 175 const v128_t vxmask_hi = wasm_i32x4_gt(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32()
|
D | vcvt-avx-int32-x32.c | 127 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__avx_int32_x32() local 130 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__avx_int32_x32() 133 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__avx_int32_x32() 138 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x32() 155 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_f16_f32_vcvt_ukernel__avx_int32_x32() local 158 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_f16_f32_vcvt_ukernel__avx_int32_x32() 161 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… in xnn_f16_f32_vcvt_ukernel__avx_int32_x32() 170 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x32()
|
/external/XNNPACK/src/f16-f32-vcvt/ |
D | neon-int32.c.in | 76 const uint32x4_t vnonsign_hi = veorq_u32(vw_hi, vsign_hi); variable 79 …_t vnorm_hi = vmulq_f32(vreinterpretq_f32_u32(vsraq_n_u32(vexp_offset, vnonsign_hi, 3)), vexp_scal… 82 … vdenorm_hi = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_hi, 16)), vreinter… 87 const uint32x4_t vxmask_hi = vcgtq_u32(vnonsign_hi, vdenorm_cutoff); 103 const uint32x4_t vnonsign_hi = veorq_u32(vw_hi, vsign_hi); variable 106 …_t vnorm_hi = vmulq_f32(vreinterpretq_f32_u32(vsraq_n_u32(vexp_offset, vnonsign_hi, 3)), vexp_scal… 109 … vdenorm_hi = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_hi, 16)), vreinter… 117 const uint32x4_t vxmask_hi = vcgtq_u32(vnonsign_hi, vdenorm_cutoff);
|
D | wasmsimd-int32.c.in | 83 const v128_t vnonsign_hi = wasm_v128_xor(vw_hi, vsign_hi); variable 86 …const v128_t vnorm_hi = wasm_f32x4_mul(wasm_i32x4_add(wasm_u32x4_shr(vnonsign_hi, 3), vexp_offset)… 89 …const v128_t vdenorm_hi = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_hi, 16), vmagic_bias… 92 const v128_t vxmask_hi = wasm_i32x4_gt(vnonsign_hi, vdenorm_cutoff); 114 const v128_t vnonsign_hi = wasm_v128_xor(vw_hi, vsign_hi); variable 117 …const v128_t vnorm_hi = wasm_f32x4_mul(wasm_i32x4_add(wasm_u32x4_shr(vnonsign_hi, 3), vexp_offset)… 120 …const v128_t vdenorm_hi = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_hi, 16), vmagic_bias… 129 const v128_t vxmask_hi = wasm_i32x4_gt(vnonsign_hi, vdenorm_cutoff);
|
D | sse-int32.c.in | 89 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); 92 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… 95 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… 104 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); 125 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); 128 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… 131 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_bi… 144 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff);
|
/external/XNNPACK/src/math/ |
D | cvt-f16-f32-wasmsimd-int32.c | 42 const v128_t vnonsign_hi = wasm_v128_xor(vw_hi, vsign_hi); in xnn_math_f16_f32_cvt__wasmsimd_int32() local 45 …const v128_t vnorm_hi = wasm_f32x4_mul(wasm_i32x4_add(wasm_u32x4_shr(vnonsign_hi, 3), vexp_offset)… in xnn_math_f16_f32_cvt__wasmsimd_int32() 48 …const v128_t vdenorm_hi = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_hi, 16), vmagic_mask… in xnn_math_f16_f32_cvt__wasmsimd_int32() 51 const v128_t vmask_hi = wasm_i32x4_gt(vnonsign_hi, vdenorm_cutoff); in xnn_math_f16_f32_cvt__wasmsimd_int32()
|
D | cvt-f16-f32-sse41-int32.c | 41 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_math_f16_f32_cvt__sse41_int32() local 44 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_math_f16_f32_cvt__sse41_int32() 47 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_ma… in xnn_math_f16_f32_cvt__sse41_int32() 50 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_math_f16_f32_cvt__sse41_int32()
|
D | cvt-f16-f32-neon-int32.c | 40 const uint32x4_t vnonsign_hi = veorq_u32(vw_hi, vsign_hi); in xnn_math_f16_f32_cvt__neon_int32() local 43 …_t vnorm_hi = vmulq_f32(vreinterpretq_f32_u32(vsraq_n_u32(vexp_offset, vnonsign_hi, 3)), vexp_scal… in xnn_math_f16_f32_cvt__neon_int32() 46 … vdenorm_hi = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_mask, vnonsign_hi, 16)), vmagic_b… in xnn_math_f16_f32_cvt__neon_int32() 49 const uint32x4_t vmask_hi = vcgtq_u32(vnonsign_hi, vdenorm_cutoff); in xnn_math_f16_f32_cvt__neon_int32()
|
D | cvt-f16-f32-sse2-int32.c | 41 const __m128i vnonsign_hi = _mm_xor_si128(vw_hi, vsign_hi); in xnn_math_f16_f32_cvt__sse2_int32() local 44 …_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_add_epi32(_mm_srli_epi32(vnonsign_hi, 3), vexp_offse… in xnn_math_f16_f32_cvt__sse2_int32() 47 …m_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi32(vnonsign_hi, 16), vmagic_ma… in xnn_math_f16_f32_cvt__sse2_int32() 50 const __m128i vmask_hi = _mm_cmpgt_epi32(vnonsign_hi, vdenorm_cutoff); in xnn_math_f16_f32_cvt__sse2_int32()
|