/external/XNNPACK/src/f16-f32-vcvt/gen/ |
D | vcvt-neon-int32-x8.c | 51 …const float32x4_t vdenorm_lo = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_l… in xnn_f16_f32_vcvt_ukernel__neon_int32_x8() local 55 …4_t vf_lo = vorrq_u32(vsign_lo, vreinterpretq_u32_f32(vbslq_f32(vxmask_lo, vnorm_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__neon_int32_x8() 78 …const float32x4_t vdenorm_lo = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_l… in xnn_f16_f32_vcvt_ukernel__neon_int32_x8() local 82 …32x4_t vf = vorrq_u32(vsign_lo, vreinterpretq_u32_f32(vbslq_f32(vxmask_lo, vnorm_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__neon_int32_x8()
|
D | vcvt-wasmsimd-int16-x8.c | 51 …const v128_t vdenorm_lo = wasm_f32x4_sub(wasm_v16x8_shuffle(vnonsign, vmagic_mask, 0, 8, 1, 9, 2… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x8() local 59 wasm_v128_bitselect(vnorm_lo, vdenorm_lo, vxmask_lo)); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x8() 84 …const v128_t vdenorm_lo = wasm_f32x4_sub(wasm_v16x8_shuffle(vnonsign, vmagic_mask, 0, 8, 1, 9, 2… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x8() local 92 wasm_v128_bitselect(vnorm_lo, vdenorm_lo, vxmask_lo)); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x8()
|
D | vcvt-neon-int16-x8.c | 47 …const float32x4_t vdenorm_lo = vsubq_f32(vreinterpretq_f32_u32(vaddw_u16(vmagic_bias, vget_low_u16… in xnn_f16_f32_vcvt_ukernel__neon_int16_x8() local 54 vreinterpretq_u32_f32(vbslq_f32(vxmask_lo, vnorm_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__neon_int16_x8() 74 …const float32x4_t vdenorm_lo = vsubq_f32(vreinterpretq_f32_u32(vaddw_u16(vmagic_bias, vget_low_u16… in xnn_f16_f32_vcvt_ukernel__neon_int16_x8() local 81 vreinterpretq_u32_f32(vbslq_f32(vxmask_lo, vnorm_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__neon_int16_x8()
|
D | vcvt-sse2-int16-x8.c | 51 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsig… in xnn_f16_f32_vcvt_ukernel__sse2_int16_x8() local 58 _mm_or_si128(_mm_and_si128(vxmask_lo, vnorm_lo), _mm_andnot_si128(vxmask_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x8() 81 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsig… in xnn_f16_f32_vcvt_ukernel__sse2_int16_x8() local 88 _mm_or_si128(_mm_and_si128(vxmask_lo, vnorm_lo), _mm_andnot_si128(vxmask_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x8()
|
D | vcvt-avx-int16-x8.c | 51 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsig… in xnn_f16_f32_vcvt_ukernel__avx_int16_x8() local 57 _mm_blendv_epi8(vdenorm_lo, vnorm_lo, _mm_cvtepi16_epi32(vmask))); in xnn_f16_f32_vcvt_ukernel__avx_int16_x8() 79 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsig… in xnn_f16_f32_vcvt_ukernel__avx_int16_x8() local 85 _mm_blendv_epi8(vdenorm_lo, vnorm_lo, _mm_cvtepi16_epi32(vmask))); in xnn_f16_f32_vcvt_ukernel__avx_int16_x8()
|
D | vcvt-sse41-int16-x8.c | 51 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsig… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x8() local 57 _mm_blendv_epi8(vdenorm_lo, vnorm_lo, _mm_cvtepi16_epi32(vmask))); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x8() 79 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsig… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x8() local 85 _mm_blendv_epi8(vdenorm_lo, vnorm_lo, _mm_cvtepi16_epi32(vmask))); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x8()
|
D | vcvt-wasmsimd-int32-x8.c | 53 …const v128_t vdenorm_lo = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_lo, 16), vmagic_bias… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x8() local 59 … const v128_t vf_lo = wasm_v128_or(vsign_lo, wasm_v128_bitselect(vnorm_lo, vdenorm_lo, vxmask_lo)); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x8() 84 …const v128_t vdenorm_lo = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_lo, 16), vmagic_bias… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x8() local 88 v128_t vf = wasm_v128_or(vsign_lo, wasm_v128_bitselect(vnorm_lo, vdenorm_lo, vxmask_lo)); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x8()
|
D | vcvt-avx-int32-x8.c | 52 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi3… in xnn_f16_f32_vcvt_ukernel__avx_int32_x8() local 56 const __m128i vf_lo = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x8() 80 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi3… in xnn_f16_f32_vcvt_ukernel__avx_int32_x8() local 84 __m128i vf = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x8()
|
D | vcvt-sse41-int32-x8.c | 52 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi3… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8() local 56 const __m128i vf_lo = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8() 80 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi3… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8() local 84 __m128i vf = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x8()
|
D | vcvt-sse2-int32-x8.c | 52 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi3… in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8() local 57 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8() 82 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi3… in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8() local 87 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__sse2_int32_x8()
|
D | vcvt-wasmsimd-int16-x16.c | 101 …const v128_t vdenorm_lo = wasm_f32x4_sub(wasm_v16x8_shuffle(vnonsign, vmagic_mask, 0, 8, 1, 9, 2… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x16() local 109 wasm_v128_bitselect(vnorm_lo, vdenorm_lo, vxmask_lo)); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x16() 134 …const v128_t vdenorm_lo = wasm_f32x4_sub(wasm_v16x8_shuffle(vnonsign, vmagic_mask, 0, 8, 1, 9, 2… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x16() local 142 wasm_v128_bitselect(vnorm_lo, vdenorm_lo, vxmask_lo)); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x16()
|
D | vcvt-sse41-int16-x16.c | 95 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsig… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x16() local 101 _mm_blendv_epi8(vdenorm_lo, vnorm_lo, _mm_cvtepi16_epi32(vmask))); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x16() 123 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsig… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x16() local 129 _mm_blendv_epi8(vdenorm_lo, vnorm_lo, _mm_cvtepi16_epi32(vmask))); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x16()
|
D | vcvt-neon-int32-x16.c | 95 …const float32x4_t vdenorm_lo = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_l… in xnn_f16_f32_vcvt_ukernel__neon_int32_x16() local 99 …4_t vf_lo = vorrq_u32(vsign_lo, vreinterpretq_u32_f32(vbslq_f32(vxmask_lo, vnorm_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__neon_int32_x16() 122 …const float32x4_t vdenorm_lo = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_l… in xnn_f16_f32_vcvt_ukernel__neon_int32_x16() local 126 …32x4_t vf = vorrq_u32(vsign_lo, vreinterpretq_u32_f32(vbslq_f32(vxmask_lo, vnorm_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__neon_int32_x16()
|
D | vcvt-avx-int16-x16.c | 95 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsig… in xnn_f16_f32_vcvt_ukernel__avx_int16_x16() local 101 _mm_blendv_epi8(vdenorm_lo, vnorm_lo, _mm_cvtepi16_epi32(vmask))); in xnn_f16_f32_vcvt_ukernel__avx_int16_x16() 123 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsig… in xnn_f16_f32_vcvt_ukernel__avx_int16_x16() local 129 _mm_blendv_epi8(vdenorm_lo, vnorm_lo, _mm_cvtepi16_epi32(vmask))); in xnn_f16_f32_vcvt_ukernel__avx_int16_x16()
|
D | vcvt-neon-int16-x16.c | 92 …const float32x4_t vdenorm_lo = vsubq_f32(vreinterpretq_f32_u32(vaddw_u16(vmagic_bias, vget_low_u16… in xnn_f16_f32_vcvt_ukernel__neon_int16_x16() local 99 vreinterpretq_u32_f32(vbslq_f32(vxmask_lo, vnorm_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__neon_int16_x16() 119 …const float32x4_t vdenorm_lo = vsubq_f32(vreinterpretq_f32_u32(vaddw_u16(vmagic_bias, vget_low_u16… in xnn_f16_f32_vcvt_ukernel__neon_int16_x16() local 126 vreinterpretq_u32_f32(vbslq_f32(vxmask_lo, vnorm_lo, vdenorm_lo))); in xnn_f16_f32_vcvt_ukernel__neon_int16_x16()
|
D | vcvt-wasmsimd-int32-x16.c | 100 …const v128_t vdenorm_lo = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_lo, 16), vmagic_bias… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16() local 106 … const v128_t vf_lo = wasm_v128_or(vsign_lo, wasm_v128_bitselect(vnorm_lo, vdenorm_lo, vxmask_lo)); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16() 131 …const v128_t vdenorm_lo = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_lo, 16), vmagic_bias… in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16() local 135 v128_t vf = wasm_v128_or(vsign_lo, wasm_v128_bitselect(vnorm_lo, vdenorm_lo, vxmask_lo)); in xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16()
|
D | vcvt-avx-int32-x16.c | 98 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi3… in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() local 102 const __m128i vf_lo = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() 126 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi3… in xnn_f16_f32_vcvt_ukernel__avx_int32_x16() local 130 __m128i vf = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__avx_int32_x16()
|
D | vcvt-sse41-int32-x16.c | 98 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi3… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() local 102 const __m128i vf_lo = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() 126 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi3… in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16() local 130 __m128i vf = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); in xnn_f16_f32_vcvt_ukernel__sse41_int32_x16()
|
/external/XNNPACK/src/f16-f32-vcvt/ |
D | sse-int32.c.in | 94 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi3… 99 const __m128i vf_lo = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); 102 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo))); 130 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_or_si128(_mm_srli_epi3… 135 __m128i vf = _mm_or_si128(vsign_lo, _mm_blendv_epi8(vdenorm_lo, vnorm_lo, vmask_lo)); 138 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo)));
|
D | sse-int16.c.in | 102 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsig… 109 _mm_blendv_epi8(vdenorm_lo, vnorm_lo, _mm_cvtepi16_epi32(vmask))); 113 _mm_or_si128(_mm_and_si128(vxmask_lo, vnorm_lo), _mm_andnot_si128(vxmask_lo, vdenorm_lo))); 140 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsig… 147 _mm_blendv_epi8(vdenorm_lo, vnorm_lo, _mm_cvtepi16_epi32(vmask))); 151 _mm_or_si128(_mm_and_si128(vxmask_lo, vnorm_lo), _mm_andnot_si128(vxmask_lo, vdenorm_lo)));
|
D | neon-int32.c.in | 81 …const float32x4_t vdenorm_lo = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_l… variable 85 …4_t vf_lo = vorrq_u32(vsign_lo, vreinterpretq_u32_f32(vbslq_f32(vxmask_lo, vnorm_lo, vdenorm_lo))); 108 …const float32x4_t vdenorm_lo = vsubq_f32(vreinterpretq_f32_u32(vsriq_n_u32(vmagic_bias, vnonsign_l… variable 112 …32x4_t vf = vorrq_u32(vsign_lo, vreinterpretq_u32_f32(vbslq_f32(vxmask_lo, vnorm_lo, vdenorm_lo)));
|
/external/XNNPACK/src/math/ |
D | cvt-f16-f32-wasmsimd-int16.c | 44 …const v128_t vdenorm_lo = wasm_f32x4_sub(wasm_v16x8_shuffle(vnonsign, vmagic_mask, 0, 8, 1, 9, 2… in xnn_math_f16_f32_cvt__wasmsimd_int16() local 53 wasm_v128_bitselect(vnorm_lo, vdenorm_lo, vmask_lo)); in xnn_math_f16_f32_cvt__wasmsimd_int16()
|
D | cvt-f16-f32-wasmsimd-int32.c | 47 …const v128_t vdenorm_lo = wasm_f32x4_sub(wasm_v128_or(wasm_u32x4_shr(vnonsign_lo, 16), vmagic_mask… in xnn_math_f16_f32_cvt__wasmsimd_int32() local 53 … const v128_t vf_lo = wasm_v128_or(vsign_lo, wasm_v128_bitselect(vnorm_lo, vdenorm_lo, vmask_lo)); in xnn_math_f16_f32_cvt__wasmsimd_int32()
|
D | cvt-f16-f32-sse41-int16.c | 44 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsig… in xnn_math_f16_f32_cvt__sse41_int16() local 49 _mm_blendv_epi8(vdenorm_lo, vnorm_lo, _mm_cvtepi16_epi32(vmask))); in xnn_math_f16_f32_cvt__sse41_int16()
|
D | cvt-f16-f32-sse2-int16.c | 44 …const __m128i vdenorm_lo = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsig… in xnn_math_f16_f32_cvt__sse2_int16() local 52 _mm_or_si128(_mm_and_si128(vmask_lo, vnorm_lo), _mm_andnot_si128(vmask_lo, vdenorm_lo))); in xnn_math_f16_f32_cvt__sse2_int16()
|