/external/XNNPACK/src/f16-f32-vcvt/gen/ |
D | vcvt-wasmsimd-int16-x32.c | 47 const v128_t vsign3 = wasm_v128_and(vh3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x32() local 52 const v128_t vnonsign3 = wasm_v128_xor(vh3, vsign3); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x32() 108 const v128_t vf6 = wasm_v128_or(wasm_v16x8_shuffle(vzero, vsign3, 0, 8, 1, 9, 2, 10, 3, 11), in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x32() 110 const v128_t vf7 = wasm_v128_or(wasm_v16x8_shuffle(vzero, vsign3, 4, 12, 5, 13, 6, 14, 7, 15), in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x32()
|
D | vcvt-sse41-int16-x32.c | 47 const __m128i vsign3 = _mm_and_si128(vh3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() local 52 const __m128i vnonsign3 = _mm_xor_si128(vh3, vsign3); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() 98 const __m128i vf6 = _mm_or_si128(_mm_unpacklo_epi16(_mm_setzero_si128(), vsign3), in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() 100 const __m128i vf7 = _mm_or_si128(_mm_unpackhi_epi16(_mm_setzero_si128(), vsign3), in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32()
|
D | vcvt-avx-int16-x32.c | 47 const __m128i vsign3 = _mm_and_si128(vh3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() local 52 const __m128i vnonsign3 = _mm_xor_si128(vh3, vsign3); in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() 98 const __m128i vf6 = _mm_or_si128(_mm_unpacklo_epi16(_mm_setzero_si128(), vsign3), in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() 100 const __m128i vf7 = _mm_or_si128(_mm_unpackhi_epi16(_mm_setzero_si128(), vsign3), in xnn_f16_f32_vcvt_ukernel__avx_int16_x32()
|
D | vcvt-neon-int16-x32.c | 45 const uint16x8_t vsign3 = vandq_u16(vh3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__neon_int16_x32() local 50 const uint16x8_t vnonsign3 = veorq_u16(vh3, vsign3); in xnn_f16_f32_vcvt_ukernel__neon_int16_x32() 90 const uint32x4_t vf6 = vorrq_u32(vshll_n_u16(vget_low_u16(vsign3), 16), in xnn_f16_f32_vcvt_ukernel__neon_int16_x32() 103 const uint32x4_t vf7 = vorrq_u32(vshll_n_u16(vget_high_u16(vsign3), 16), in xnn_f16_f32_vcvt_ukernel__neon_int16_x32()
|
D | vcvt-neon-int32-x16.c | 48 const uint32x4_t vsign3 = vandq_u32(vw3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__neon_int32_x16() local 53 const uint32x4_t vnonsign3 = veorq_u32(vw3, vsign3); in xnn_f16_f32_vcvt_ukernel__neon_int32_x16() 73 …const uint32x4_t vf3 = vorrq_u32(vsign3, vreinterpretq_u32_f32(vbslq_f32(vxmask3, vnorm3, vdenorm3… in xnn_f16_f32_vcvt_ukernel__neon_int32_x16()
|
D | vcvt-wasmsimd-int32-x16.c | 50 const v128_t vsign3 = wasm_v128_and(vw3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16() local 55 const v128_t vnonsign3 = wasm_v128_xor(vw3, vsign3); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16() 75 const v128_t vf3 = wasm_v128_or(vsign3, wasm_v128_bitselect(vnorm3, vdenorm3, vxmask3)); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16()
|
D | vcvt-sse2-int16-x32.c | 47 const __m128i vsign3 = _mm_and_si128(vh3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x32() local 52 const __m128i vnonsign3 = _mm_xor_si128(vh3, vsign3); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x32() 105 const __m128i vf6 = _mm_or_si128(_mm_unpacklo_epi16(_mm_setzero_si128(), vsign3), in xnn_f16_f32_vcvt_ukernel__sse2_int16_x32() 108 const __m128i vf7 = _mm_or_si128(_mm_unpackhi_epi16(_mm_setzero_si128(), vsign3), in xnn_f16_f32_vcvt_ukernel__sse2_int16_x32()
|
D | vcvt-scalar-x4.c | 52 const uint32_t vsign3 = vw3 & vsign_mask; in xnn_f16_f32_vcvt_ukernel__scalar_x4() local 72 const uint32_t vf3 = vsign3 | (XNN_UNPREDICTABLE(v2w3 < vdenorm_cutoff) ? vdenorm3 : vnorm3); in xnn_f16_f32_vcvt_ukernel__scalar_x4()
|
D | vcvt-avx-int32-x16.c | 49 const __m128i vsign3 = _mm_and_si128(vw3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() local 54 const __m128i vnonsign3 = _mm_xor_si128(vw3, vsign3); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() 74 const __m128i vf3 = _mm_or_si128(vsign3, _mm_blendv_epi8(vdenorm3, vnorm3, vmask3)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16()
|
D | vcvt-sse41-int32-x16.c | 49 const __m128i vsign3 = _mm_and_si128(vw3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() local 54 const __m128i vnonsign3 = _mm_xor_si128(vw3, vsign3); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() 74 const __m128i vf3 = _mm_or_si128(vsign3, _mm_blendv_epi8(vdenorm3, vnorm3, vmask3)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16()
|
D | vcvt-sse2-int32-x16.c | 49 const __m128i vsign3 = _mm_and_si128(vw3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16() local 54 const __m128i vnonsign3 = _mm_xor_si128(vw3, vsign3); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16() 77 const __m128i vf3 = _mm_or_si128(vsign3, in xnn_f16_f32_vcvt_ukernel__sse2_int32_x16()
|
D | vcvt-wasmsimd-int32-x24.c | 53 const v128_t vsign3 = wasm_v128_and(vw3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24() local 60 const v128_t vnonsign3 = wasm_v128_xor(vw3, vsign3); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24() 88 const v128_t vf3 = wasm_v128_or(vsign3, wasm_v128_bitselect(vnorm3, vdenorm3, vxmask3)); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24()
|
D | vcvt-sse41-int32-x24.c | 52 const __m128i vsign3 = _mm_and_si128(vw3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24() local 59 const __m128i vnonsign3 = _mm_xor_si128(vw3, vsign3); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24() 87 const __m128i vf3 = _mm_or_si128(vsign3, _mm_blendv_epi8(vdenorm3, vnorm3, vmask3)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x24()
|
D | vcvt-neon-int32-x24.c | 51 const uint32x4_t vsign3 = vandq_u32(vw3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__neon_int32_x24() local 58 const uint32x4_t vnonsign3 = veorq_u32(vw3, vsign3); in xnn_f16_f32_vcvt_ukernel__neon_int32_x24() 86 …const uint32x4_t vf3 = vorrq_u32(vsign3, vreinterpretq_u32_f32(vbslq_f32(vxmask3, vnorm3, vdenorm3… in xnn_f16_f32_vcvt_ukernel__neon_int32_x24()
|
D | vcvt-avx-int32-x24.c | 52 const __m128i vsign3 = _mm_and_si128(vw3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__avx_int32_x24() local 59 const __m128i vnonsign3 = _mm_xor_si128(vw3, vsign3); in xnn_f16_f32_vcvt_ukernel__avx_int32_x24() 87 const __m128i vf3 = _mm_or_si128(vsign3, _mm_blendv_epi8(vdenorm3, vnorm3, vmask3)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x24()
|
D | vcvt-sse2-int32-x24.c | 52 const __m128i vsign3 = _mm_and_si128(vw3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24() local 59 const __m128i vnonsign3 = _mm_xor_si128(vw3, vsign3); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24() 90 const __m128i vf3 = _mm_or_si128(vsign3, in xnn_f16_f32_vcvt_ukernel__sse2_int32_x24()
|
D | vcvt-neon-int32-x32.c | 54 const uint32x4_t vsign3 = vandq_u32(vw3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__neon_int32_x32() local 63 const uint32x4_t vnonsign3 = veorq_u32(vw3, vsign3); in xnn_f16_f32_vcvt_ukernel__neon_int32_x32() 99 …const uint32x4_t vf3 = vorrq_u32(vsign3, vreinterpretq_u32_f32(vbslq_f32(vxmask3, vnorm3, vdenorm3… in xnn_f16_f32_vcvt_ukernel__neon_int32_x32()
|
D | vcvt-wasmsimd-int32-x32.c | 56 const v128_t vsign3 = wasm_v128_and(vw3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32() local 65 const v128_t vnonsign3 = wasm_v128_xor(vw3, vsign3); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32() 101 const v128_t vf3 = wasm_v128_or(vsign3, wasm_v128_bitselect(vnorm3, vdenorm3, vxmask3)); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32()
|
D | vcvt-avx-int32-x32.c | 55 const __m128i vsign3 = _mm_and_si128(vw3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__avx_int32_x32() local 64 const __m128i vnonsign3 = _mm_xor_si128(vw3, vsign3); in xnn_f16_f32_vcvt_ukernel__avx_int32_x32() 100 const __m128i vf3 = _mm_or_si128(vsign3, _mm_blendv_epi8(vdenorm3, vnorm3, vmask3)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x32()
|
D | vcvt-sse41-int32-x32.c | 55 const __m128i vsign3 = _mm_and_si128(vw3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x32() local 64 const __m128i vnonsign3 = _mm_xor_si128(vw3, vsign3); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x32() 100 const __m128i vf3 = _mm_or_si128(vsign3, _mm_blendv_epi8(vdenorm3, vnorm3, vmask3)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x32()
|
D | vcvt-sse2-int32-x32.c | 55 const __m128i vsign3 = _mm_and_si128(vw3, vsign_mask); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x32() local 64 const __m128i vnonsign3 = _mm_xor_si128(vw3, vsign3); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x32() 103 const __m128i vf3 = _mm_or_si128(vsign3, in xnn_f16_f32_vcvt_ukernel__sse2_int32_x32()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx512f-rr1-lut16-p3-perm-x64.c | 111 const __mmask16 vsign3 = _mm512_cmp_ps_mask(vx3, vzero, _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64() local 116 vy3 = _mm512_mask_mul_ps(vy3, vsign3, vx3, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64()
|
D | velu-avx512f-rr1-p6-x64.c | 119 const __mmask16 vsign3 = _mm512_cmp_ps_mask(vx3, vzero, _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() local 124 vy3 = _mm512_mask_mul_ps(vy3, vsign3, vx3, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64()
|
D | velu-avx512f-rr1-lut16-p3-perm-x80.c | 124 const __mmask16 vsign3 = _mm512_cmp_ps_mask(vx3, vzero, _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() local 131 vy3 = _mm512_mask_mul_ps(vy3, vsign3, vx3, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80()
|
D | velu-avx512f-rr1-p6-x80.c | 133 const __mmask16 vsign3 = _mm512_cmp_ps_mask(vx3, vzero, _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() local 140 vy3 = _mm512_mask_mul_ps(vy3, vsign3, vx3, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
|