/external/XNNPACK/src/f16-f32-vcvt/gen/ |
D | vcvt-sse2-int32-x8.c | 55 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8() local 57 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8() 85 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8() local 87 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8()
|
D | vcvt-sse2-int32-x16.c | 105 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16() local 107 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16() 135 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16() local 137 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16()
|
D | vcvt-avx-int32-x8.c | 55 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x8() local 56 const __m128i vf_lo = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x8() 83 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x8() local 84 __m128i vf = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x8()
|
D | vcvt-sse41-int32-x8.c | 55 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8() local 56 const __m128i vf_lo = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8() 83 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8() local 84 __m128i vf = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8()
|
D | vcvt-sse2-int32-x24.c | 124 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24() local 126 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24() 154 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24() local 156 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24()
|
D | vcvt-sse2-int32-x32.c | 143 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x32() local 145 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x32() 173 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x32() local 175 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x32()
|
D | vcvt-avx-int32-x16.c | 101 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() local 102 const __m128i vf_lo = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() 129 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() local 130 __m128i vf = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16()
|
D | vcvt-sse41-int32-x16.c | 101 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() local 102 const __m128i vf_lo = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() 129 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() local 130 __m128i vf = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16()
|
D | vcvt-sse41-int32-x24.c | 118 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24() local 119 const __m128i vf_lo = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24() 146 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24() local 147 __m128i vf = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24()
|
D | vcvt-avx-int32-x24.c | 118 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x24() local 119 const __m128i vf_lo = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x24() 146 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x24() local 147 __m128i vf = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x24()
|
D | vcvt-avx-int32-x32.c | 135 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x32() local 136 const __m128i vf_lo = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x32() 163 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int32_x32() local 164 __m128i vf = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x32()
|
D | vcvt-sse41-int32-x32.c | 135 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x32() local 136 const __m128i vf_lo = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x32() 163 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x32() local 164 __m128i vf = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x32()
|
/external/XNNPACK/src/f16-f32-vcvt/ |
D | sse-int32.c.in | 97 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); 99 const __m128i vf_lo = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); 102 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo))); 133 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); 135 __m128i vf = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); 138 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo)));
|
/external/XNNPACK/src/math/ |
D | cvt-f16-f32-sse2-int16.c | 48 const __m128i vmask_lo = _mm_unpacklo_epi16(vmask, vmask); in xnn_math_f16_f32_cvt__sse2_int16() local 52 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo))); in xnn_math_f16_f32_cvt__sse2_int16()
|
D | cvt-f16-f32-sse2-int32.c | 49 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_math_f16_f32_cvt__sse2_int32() local 53 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo))); in xnn_math_f16_f32_cvt__sse2_int32()
|
D | cvt-f16-f32-wasmsimd-int16.c | 48 const v128_t vmask_lo = wasm_i32x4_extend_low_i16x8(vmask); in xnn_math_f16_f32_cvt__wasmsimd_int16() local 53 wasm_v128_bitselect(vnorm_lo, vdenorm_lo, vmask_lo)); in xnn_math_f16_f32_cvt__wasmsimd_int16()
|
D | cvt-f16-f32-wasmsimd-int32.c | 50 const v128_t vmask_lo = wasm_i32x4_gt(vnonsign_lo, vdenorm_cutoff); in xnn_math_f16_f32_cvt__wasmsimd_int32() local 53 … const v128_t vf_lo = wasm_v128_or(vsign_lo, wasm_v128_bitselect(vnorm_lo, vdenorm_lo, vmask_lo)); in xnn_math_f16_f32_cvt__wasmsimd_int32()
|
D | cvt-f16-f32-sse41-int32.c | 49 const __m128i vmask_lo = _mm_cmpgt_epi32(vnonsign_lo, vdenorm_cutoff); in xnn_math_f16_f32_cvt__sse41_int32() local 53 _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_math_f16_f32_cvt__sse41_int32()
|
D | cvt-f16-f32-neon-int32.c | 48 const uint32x4_t vmask_lo = vcgtq_u32(vnonsign_lo, vdenorm_cutoff); in xnn_math_f16_f32_cvt__neon_int32() local 51 …const uint32x4_t vf_lo = vorrq_u32(vsign_lo, vreinterpretq_u32_f32(vbslq_f32(vmask_lo, vnorm_lo, v… in xnn_math_f16_f32_cvt__neon_int32()
|
D | cvt-f16-f32-neon-int16.c | 45 …const uint32x4_t vmask_lo = vreinterpretq_u32_s32(vmovl_s16(vreinterpret_s16_u16(vget_low_u16(vmas… in xnn_math_f16_f32_cvt__neon_int16() local 49 vreinterpretq_u32_f32(vbslq_f32(vmask_lo, vnorm_lo, vdenorm_lo))); in xnn_math_f16_f32_cvt__neon_int16()
|