/external/XNNPACK/src/f32-f16-vcvt/gen/ |
D | vcvt-f16c-x8.c | 47 _mm_storel_epi64((__m128i*) o, _mm_cvtps_ph(vf_lo, _MM_FROUND_NO_EXC)); in xnn_f32_f16_vcvt_ukernel__f16c_x8() 51 __m128i vh = _mm_cvtps_ph(vf_lo, _MM_FROUND_NO_EXC); in xnn_f32_f16_vcvt_ukernel__f16c_x8()
|
D | vcvt-f16c-x16.c | 56 _mm_storel_epi64((__m128i*) o, _mm_cvtps_ph(vf_lo, _MM_FROUND_NO_EXC)); in xnn_f32_f16_vcvt_ukernel__f16c_x16() 60 __m128i vh = _mm_cvtps_ph(vf_lo, _MM_FROUND_NO_EXC); in xnn_f32_f16_vcvt_ukernel__f16c_x16()
|
/external/XNNPACK/src/f32-f16-vcvt/ |
D | f16c.c.in | 58 _mm_storel_epi64((__m128i*) o, _mm_cvtps_ph(vf_lo, _MM_FROUND_NO_EXC)); 62 __m128i vh = _mm_cvtps_ph(vf_lo, _MM_FROUND_NO_EXC);
|
/external/clang/test/CodeGen/ |
D | f16c-builtins.c | 49 return _mm_cvtps_ph(a, 0); in test_mm_cvtps_ph()
|
/external/clang/lib/Headers/ |
D | f16cintrin.h | 102 #define _mm_cvtps_ph(a, imm) \ macro
|
/external/XNNPACK/src/f16-rmax/ |
D | f16c.c | 65 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vmax_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_rmax_ukernel__f16c()
|
/external/rust/crates/half/src/binary16/ |
D | convert.rs | 634 __m128, __m128i, __m256, _mm256_cvtph_ps, _mm256_cvtps_ph, _mm_cvtph_ps, _mm_cvtps_ph, 654 let retval = _mm_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT); in f32_to_f16_x86_f16c() 672 let retval = _mm_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT);
|
/external/FP16/bench/ |
D | ieee-element.cc | 185 _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(fp32), _MM_FROUND_CUR_DIRECTION))); in fp16_ieee_from_fp32_hardware()
|
D | to-ieee-array.cc | 73 _mm_cvtps_ph(_mm_loadu_ps(&input[i]), _MM_FROUND_CUR_DIRECTION)); in hardware_mm_cvtps_ph()
|
/external/XNNPACK/src/f16-raddstoreexpminusmax/gen/ |
D | avx2-rr1-p2-x32-acc2.c | 175 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x32_acc2()
|
D | avx2-rr1-p2-x32-acc4.c | 179 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x32_acc4()
|
D | avx2-rr1-p2-x32.c | 173 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x32()
|
D | avx2-rr1-p2-x40.c | 185 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x40()
|
D | avx2-rr1-p2-x40-acc2.c | 187 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x40_acc2()
|
D | avx2-rr1-p2-x40-acc5.c | 193 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x40_acc5()
|
D | avx2-rr1-p2-x48-acc2.c | 199 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x48_acc2()
|
D | avx2-rr1-p2-x48-acc3.c | 201 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x48_acc3()
|
D | avx2-rr1-p2-x48.c | 197 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x48()
|
D | avx2-rr1-p2-x64-acc2.c | 223 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc2()
|
D | avx2-rr1-p2-x64-acc4.c | 227 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc4()
|
D | avx2-rr1-p2-x64.c | 221 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64()
|
D | avx2-rr1-p2-x72.c | 233 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72()
|
D | avx2-rr1-p2-x72-acc3.c | 237 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72_acc3()
|
D | avx2-rr1-p2-x80.c | 245 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80()
|
/external/XNNPACK/src/f16-raddstoreexpminusmax/ |
D | avx2-rr1-p2.c.in | 159 *((uint16_t*) sum) = (uint16_t) _mm_extract_epi16(_mm_cvtps_ph(vacc_lo, _MM_FROUND_NO_EXC), 0);
|