/external/XNNPACK/src/f16-f32-vcvt/gen/ |
D | vcvt-wasmsimd-int16-x24.c | 45 const v128_t vsign2 = wasm_v128_and(vh2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x24() local 49 const v128_t vnonsign2 = wasm_v128_xor(vh2, vsign2); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x24() 92 const v128_t vf4 = wasm_v128_or(wasm_v16x8_shuffle(vzero, vsign2, 0, 8, 1, 9, 2, 10, 3, 11), in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x24() 94 const v128_t vf5 = wasm_v128_or(wasm_v16x8_shuffle(vzero, vsign2, 4, 12, 5, 13, 6, 14, 7, 15), in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x24()
|
D | vcvt-sse41-int16-x24.c | 45 const __m128i vsign2 = _mm_and_si128(vh2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24() local 49 const __m128i vnonsign2 = _mm_xor_si128(vh2, vsign2); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24() 84 const __m128i vf4 = _mm_or_si128(_mm_unpacklo_epi16(_mm_setzero_si128(), vsign2), in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24() 86 const __m128i vf5 = _mm_or_si128(_mm_unpackhi_epi16(_mm_setzero_si128(), vsign2), in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24()
|
D | vcvt-avx-int16-x24.c | 45 const __m128i vsign2 = _mm_and_si128(vh2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__avx_int16_x24() local 49 const __m128i vnonsign2 = _mm_xor_si128(vh2, vsign2); in xnn_f16_f32_vcvt_ukernel__avx_int16_x24() 84 const __m128i vf4 = _mm_or_si128(_mm_unpacklo_epi16(_mm_setzero_si128(), vsign2), in xnn_f16_f32_vcvt_ukernel__avx_int16_x24() 86 const __m128i vf5 = _mm_or_si128(_mm_unpackhi_epi16(_mm_setzero_si128(), vsign2), in xnn_f16_f32_vcvt_ukernel__avx_int16_x24()
|
D | vcvt-sse2-int16-x24.c | 45 const __m128i vsign2 = _mm_and_si128(vh2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x24() local 49 const __m128i vnonsign2 = _mm_xor_si128(vh2, vsign2); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x24() 89 const __m128i vf4 = _mm_or_si128(_mm_unpacklo_epi16(_mm_setzero_si128(), vsign2), in xnn_f16_f32_vcvt_ukernel__sse2_int16_x24() 92 const __m128i vf5 = _mm_or_si128(_mm_unpackhi_epi16(_mm_setzero_si128(), vsign2), in xnn_f16_f32_vcvt_ukernel__sse2_int16_x24()
|
D | vcvt-neon-int16-x24.c | 43 const uint16x8_t vsign2 = vandq_u16(vh2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__neon_int16_x24() local 47 const uint16x8_t vnonsign2 = veorq_u16(vh2, vsign2); in xnn_f16_f32_vcvt_ukernel__neon_int16_x24() 78 const uint32x4_t vf4 = vorrq_u32(vshll_n_u16(vget_low_u16(vsign2), 16), in xnn_f16_f32_vcvt_ukernel__neon_int16_x24() 88 const uint32x4_t vf5 = vorrq_u32(vshll_n_u16(vget_high_u16(vsign2), 16), in xnn_f16_f32_vcvt_ukernel__neon_int16_x24()
|
D | vcvt-wasmsimd-int16-x32.c | 46 const v128_t vsign2 = wasm_v128_and(vh2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x32() local 51 const v128_t vnonsign2 = wasm_v128_xor(vh2, vsign2); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x32() 104 const v128_t vf4 = wasm_v128_or(wasm_v16x8_shuffle(vzero, vsign2, 0, 8, 1, 9, 2, 10, 3, 11), in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x32() 106 const v128_t vf5 = wasm_v128_or(wasm_v16x8_shuffle(vzero, vsign2, 4, 12, 5, 13, 6, 14, 7, 15), in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x32()
|
D | vcvt-sse41-int16-x32.c | 46 const __m128i vsign2 = _mm_and_si128(vh2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() local 51 const __m128i vnonsign2 = _mm_xor_si128(vh2, vsign2); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() 94 const __m128i vf4 = _mm_or_si128(_mm_unpacklo_epi16(_mm_setzero_si128(), vsign2), in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() 96 const __m128i vf5 = _mm_or_si128(_mm_unpackhi_epi16(_mm_setzero_si128(), vsign2), in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32()
|
D | vcvt-avx-int16-x32.c | 46 const __m128i vsign2 = _mm_and_si128(vh2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() local 51 const __m128i vnonsign2 = _mm_xor_si128(vh2, vsign2); in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() 94 const __m128i vf4 = _mm_or_si128(_mm_unpacklo_epi16(_mm_setzero_si128(), vsign2), in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() 96 const __m128i vf5 = _mm_or_si128(_mm_unpackhi_epi16(_mm_setzero_si128(), vsign2), in xnn_f16_f32_vcvt_ukernel__avx_int16_x32()
|
D | vcvt-neon-int16-x32.c | 44 const uint16x8_t vsign2 = vandq_u16(vh2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__neon_int16_x32() local 49 const uint16x8_t vnonsign2 = veorq_u16(vh2, vsign2); in xnn_f16_f32_vcvt_ukernel__neon_int16_x32() 87 const uint32x4_t vf4 = vorrq_u32(vshll_n_u16(vget_low_u16(vsign2), 16), in xnn_f16_f32_vcvt_ukernel__neon_int16_x32() 100 const uint32x4_t vf5 = vorrq_u32(vshll_n_u16(vget_high_u16(vsign2), 16), in xnn_f16_f32_vcvt_ukernel__neon_int16_x32()
|
D | vcvt-scalar-x3.c | 49 const uint32_t vsign2 = vw2 & vsign_mask; in xnn_f16_f32_vcvt_ukernel__scalar_x3() local 65 const uint32_t vf2 = vsign2 | (XNN_UNPREDICTABLE(v2w2 < vdenorm_cutoff) ? vdenorm2 : vnorm2); in xnn_f16_f32_vcvt_ukernel__scalar_x3()
|
D | vcvt-neon-int32-x16.c | 47 const uint32x4_t vsign2 = vandq_u32(vw2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__neon_int32_x16() local 52 const uint32x4_t vnonsign2 = veorq_u32(vw2, vsign2); in xnn_f16_f32_vcvt_ukernel__neon_int32_x16() 72 …const uint32x4_t vf2 = vorrq_u32(vsign2, vreinterpretq_u32_f32(vbslq_f32(vxmask2, vnorm2, vdenorm2… in xnn_f16_f32_vcvt_ukernel__neon_int32_x16()
|
D | vcvt-wasmsimd-int32-x16.c | 49 const v128_t vsign2 = wasm_v128_and(vw2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16() local 54 const v128_t vnonsign2 = wasm_v128_xor(vw2, vsign2); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16() 74 const v128_t vf2 = wasm_v128_or(vsign2, wasm_v128_bitselect(vnorm2, vdenorm2, vxmask2)); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16()
|
D | vcvt-sse2-int16-x32.c | 46 const __m128i vsign2 = _mm_and_si128(vh2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x32() local 51 const __m128i vnonsign2 = _mm_xor_si128(vh2, vsign2); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x32() 99 const __m128i vf4 = _mm_or_si128(_mm_unpacklo_epi16(_mm_setzero_si128(), vsign2), in xnn_f16_f32_vcvt_ukernel__sse2_int16_x32() 102 const __m128i vf5 = _mm_or_si128(_mm_unpackhi_epi16(_mm_setzero_si128(), vsign2), in xnn_f16_f32_vcvt_ukernel__sse2_int16_x32()
|
D | vcvt-scalar-x4.c | 51 const uint32_t vsign2 = vw2 & vsign_mask; in xnn_f16_f32_vcvt_ukernel__scalar_x4() local 71 const uint32_t vf2 = vsign2 | (XNN_UNPREDICTABLE(v2w2 < vdenorm_cutoff) ? vdenorm2 : vnorm2); in xnn_f16_f32_vcvt_ukernel__scalar_x4()
|
D | vcvt-avx-int32-x16.c | 48 const __m128i vsign2 = _mm_and_si128(vw2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() local 53 const __m128i vnonsign2 = _mm_xor_si128(vw2, vsign2); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() 73 const __m128i vf2 = _mm_or_si128(vsign2, _mm_blendv_epi8(vdenorm2, vnorm2, vmask2)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16()
|
D | vcvt-sse41-int32-x16.c | 48 const __m128i vsign2 = _mm_and_si128(vw2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() local 53 const __m128i vnonsign2 = _mm_xor_si128(vw2, vsign2); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() 73 const __m128i vf2 = _mm_or_si128(vsign2, _mm_blendv_epi8(vdenorm2, vnorm2, vmask2)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16()
|
D | vcvt-sse2-int32-x16.c | 48 const __m128i vsign2 = _mm_and_si128(vw2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16() local 53 const __m128i vnonsign2 = _mm_xor_si128(vw2, vsign2); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16() 75 const __m128i vf2 = _mm_or_si128(vsign2, in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16()
|
D | vcvt-wasmsimd-int32-x24.c | 52 const v128_t vsign2 = wasm_v128_and(vw2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24() local 59 const v128_t vnonsign2 = wasm_v128_xor(vw2, vsign2); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24() 87 const v128_t vf2 = wasm_v128_or(vsign2, wasm_v128_bitselect(vnorm2, vdenorm2, vxmask2)); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24()
|
D | vcvt-sse41-int32-x24.c | 51 const __m128i vsign2 = _mm_and_si128(vw2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24() local 58 const __m128i vnonsign2 = _mm_xor_si128(vw2, vsign2); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24() 86 const __m128i vf2 = _mm_or_si128(vsign2, _mm_blendv_epi8(vdenorm2, vnorm2, vmask2)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24()
|
D | vcvt-neon-int32-x24.c | 50 const uint32x4_t vsign2 = vandq_u32(vw2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__neon_int32_x24() local 57 const uint32x4_t vnonsign2 = veorq_u32(vw2, vsign2); in xnn_f16_f32_vcvt_ukernel__neon_int32_x24() 85 …const uint32x4_t vf2 = vorrq_u32(vsign2, vreinterpretq_u32_f32(vbslq_f32(vxmask2, vnorm2, vdenorm2… in xnn_f16_f32_vcvt_ukernel__neon_int32_x24()
|
D | vcvt-avx-int32-x24.c | 51 const __m128i vsign2 = _mm_and_si128(vw2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__avx_int32_x24() local 58 const __m128i vnonsign2 = _mm_xor_si128(vw2, vsign2); in xnn_f16_f32_vcvt_ukernel__avx_int32_x24() 86 const __m128i vf2 = _mm_or_si128(vsign2, _mm_blendv_epi8(vdenorm2, vnorm2, vmask2)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x24()
|
D | vcvt-sse2-int32-x24.c | 51 const __m128i vsign2 = _mm_and_si128(vw2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24() local 58 const __m128i vnonsign2 = _mm_xor_si128(vw2, vsign2); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24() 88 const __m128i vf2 = _mm_or_si128(vsign2, in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24()
|
D | vcvt-neon-int32-x32.c | 53 const uint32x4_t vsign2 = vandq_u32(vw2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__neon_int32_x32() local 62 const uint32x4_t vnonsign2 = veorq_u32(vw2, vsign2); in xnn_f16_f32_vcvt_ukernel__neon_int32_x32() 98 …const uint32x4_t vf2 = vorrq_u32(vsign2, vreinterpretq_u32_f32(vbslq_f32(vxmask2, vnorm2, vdenorm2… in xnn_f16_f32_vcvt_ukernel__neon_int32_x32()
|
D | vcvt-wasmsimd-int32-x32.c | 55 const v128_t vsign2 = wasm_v128_and(vw2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32() local 64 const v128_t vnonsign2 = wasm_v128_xor(vw2, vsign2); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32() 100 const v128_t vf2 = wasm_v128_or(vsign2, wasm_v128_bitselect(vnorm2, vdenorm2, vxmask2)); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32()
|
D | vcvt-avx-int32-x32.c | 54 const __m128i vsign2 = _mm_and_si128(vw2, vsign_mask); in xnn_f16_f32_vcvt_ukernel__avx_int32_x32() local 63 const __m128i vnonsign2 = _mm_xor_si128(vw2, vsign2); in xnn_f16_f32_vcvt_ukernel__avx_int32_x32() 99 const __m128i vf2 = _mm_or_si128(vsign2, _mm_blendv_epi8(vdenorm2, vnorm2, vmask2)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x32()
|