/external/XNNPACK/src/f32-vrnd/ |
D | vrndu-neon.c.in | 49 uint32x4_t vadjmask${ABC[N:N+4]} = vcgeq_f32(vrndx${ABC[N:N+4]}, vx${ABC[N:N+4]}); 55 vadjmask${ABC[N:N+4]} = vorrq_u32(vadjmask${ABC[N:N+4]}, vmovq_n_u32(UINT32_C(0x80000000))); 58 …const float32x4_t vy${ABC[N:N+4]} = vbslq_f32(vadjmask${ABC[N:N+4]}, vrndx${ABC[N:N+4]}, vadjrndx$… 71 uint32x4_t vadjmask = vcgeq_f32(vrndx, vx); variable 73 vadjmask = vorrq_u32(vadjmask, vmovq_n_u32(UINT32_C(0x80000000))); 74 const float32x4_t vy = vbslq_f32(vadjmask, vrndx, vadjrndx); 84 uint32x4_t vadjmask = vcgeq_f32(vrndx, vx); variable 86 vadjmask = vorrq_u32(vadjmask, vmovq_n_u32(UINT32_C(0x80000000))); 87 const float32x4_t vy = vbslq_f32(vadjmask, vrndx, vadjrndx);
|
D | vrndu-sse2.c.in | 48 …const __m128 vadjmask${ABC[N:N+4]} = _mm_or_ps(_mm_cmpge_ps(vrndx${ABC[N:N+4]}, vx${ABC[N:N+4]}), … 54 …C[N:N+4]} = _mm_or_ps(_mm_and_ps(vrndx${ABC[N:N+4]}, vadjmask${ABC[N:N+4]}), _mm_andnot_ps(vadjmas… 70 const __m128 vadjmask = _mm_or_ps(_mm_cmpge_ps(vrndx, vx), _mm_castsi128_ps(vmagic)); variable 72 const __m128 vy = _mm_or_ps(_mm_and_ps(vrndx, vadjmask), _mm_andnot_ps(vadjmask, vadjrndx)); 83 const __m128 vadjmask = _mm_or_ps(_mm_cmpge_ps(vrndx, vx), _mm_castsi128_ps(vmagic)); variable 85 __m128 vy = _mm_or_ps(_mm_and_ps(vrndx, vadjmask), _mm_andnot_ps(vadjmask, vadjrndx));
|
D | vrndu-wasmsimd-addsub.c.in | 50 …const v128_t vadjmask${ABC[N:N+4]} = wasm_v128_or(vsign_mask, wasm_f32x4_le(vx${ABC[N:N+4]}, vrndx… 56 …BC[N:N+4]} = wasm_v128_bitselect(vrndx${ABC[N:N+4]}, vadjrndx${ABC[N:N+4]}, vadjmask${ABC[N:N+4]}); 71 const v128_t vadjmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vx, vrndx)); variable 73 const v128_t vy = wasm_v128_bitselect(vrndx, vadjrndx, vadjmask); 85 const v128_t vadjmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vx, vrndx)); variable 87 v128_t vy = wasm_v128_bitselect(vrndx, vadjrndx, vadjmask);
|
D | vrndu-wasmsimd-cvt.c.in | 49 …const v128_t vadjmask${ABC[N:N+4]} = wasm_v128_or(vsign_mask, wasm_f32x4_le(vx${ABC[N:N+4]}, vrndx… 55 …BC[N:N+4]} = wasm_v128_bitselect(vrndx${ABC[N:N+4]}, vadjrndx${ABC[N:N+4]}, vadjmask${ABC[N:N+4]}); 71 const v128_t vadjmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vx, vrndx)); variable 73 const v128_t vy = wasm_v128_bitselect(vrndx, vadjrndx, vadjmask); 86 const v128_t vadjmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vx, vrndx)); variable 88 v128_t vy = wasm_v128_bitselect(vrndx, vadjrndx, vadjmask);
|
D | vrndd-neon.c.in | 49 const uint32x4_t vadjmask${ABC[N:N+4]} = vcgtq_f32(vrndx${ABC[N:N+4]}, vx${ABC[N:N+4]}); 52 …const float32x4_t vadjrndx${ABC[N:N+4]} = vreinterpretq_f32_u32(vandq_u32(vadjmask${ABC[N:N+4]}, v… 68 const uint32x4_t vadjmask = vcgtq_f32(vrndx, vx); variable 69 const float32x4_t vadjrndx = vreinterpretq_f32_u32(vandq_u32(vadjmask, vone)); 80 const uint32x4_t vadjmask = vcgtq_f32(vrndx, vx); variable 81 const float32x4_t vadjrndx = vreinterpretq_f32_u32(vandq_u32(vadjmask, vone));
|
/external/XNNPACK/src/f32-vrnd/gen/ |
D | vrndu-neon-x8.c | 71 uint32x4_t vadjmask = vcgeq_f32(vrndx, vx); in xnn_f32_vrndu_ukernel__neon_x8() local 73 vadjmask = vorrq_u32(vadjmask, vmovq_n_u32(UINT32_C(0x80000000))); in xnn_f32_vrndu_ukernel__neon_x8() 74 const float32x4_t vy = vbslq_f32(vadjmask, vrndx, vadjrndx); in xnn_f32_vrndu_ukernel__neon_x8() 84 uint32x4_t vadjmask = vcgeq_f32(vrndx, vx); in xnn_f32_vrndu_ukernel__neon_x8() local 86 vadjmask = vorrq_u32(vadjmask, vmovq_n_u32(UINT32_C(0x80000000))); in xnn_f32_vrndu_ukernel__neon_x8() 87 const float32x4_t vy = vbslq_f32(vadjmask, vrndx, vadjrndx); in xnn_f32_vrndu_ukernel__neon_x8()
|
D | vrndu-sse2-x8.c | 68 const __m128 vadjmask = _mm_or_ps(_mm_cmpge_ps(vrndx, vx), _mm_castsi128_ps(vmagic)); in xnn_f32_vrndu_ukernel__sse2_x8() local 70 const __m128 vy = _mm_or_ps(_mm_and_ps(vrndx, vadjmask), _mm_andnot_ps(vadjmask, vadjrndx)); in xnn_f32_vrndu_ukernel__sse2_x8() 81 const __m128 vadjmask = _mm_or_ps(_mm_cmpge_ps(vrndx, vx), _mm_castsi128_ps(vmagic)); in xnn_f32_vrndu_ukernel__sse2_x8() local 83 __m128 vy = _mm_or_ps(_mm_and_ps(vrndx, vadjmask), _mm_andnot_ps(vadjmask, vadjrndx)); in xnn_f32_vrndu_ukernel__sse2_x8()
|
D | vrndu-wasmsimd-addsub-x4.c | 39 const v128_t vadjmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vx, vrndx)); in xnn_f32_vrndu_ukernel__wasmsimd_addsub_x4() local 41 const v128_t vy = wasm_v128_bitselect(vrndx, vadjrndx, vadjmask); in xnn_f32_vrndu_ukernel__wasmsimd_addsub_x4() 53 const v128_t vadjmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vx, vrndx)); in xnn_f32_vrndu_ukernel__wasmsimd_addsub_x4() local 55 v128_t vy = wasm_v128_bitselect(vrndx, vadjrndx, vadjmask); in xnn_f32_vrndu_ukernel__wasmsimd_addsub_x4()
|
D | vrndu-wasmsimd-cvt-x4.c | 40 const v128_t vadjmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vx, vrndx)); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x4() local 42 const v128_t vy = wasm_v128_bitselect(vrndx, vadjrndx, vadjmask); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x4() 55 const v128_t vadjmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vx, vrndx)); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x4() local 57 v128_t vy = wasm_v128_bitselect(vrndx, vadjrndx, vadjmask); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x4()
|
D | vrndu-neon-x4.c | 60 uint32x4_t vadjmask = vcgeq_f32(vrndx, vx); in xnn_f32_vrndu_ukernel__neon_x4() local 62 vadjmask = vorrq_u32(vadjmask, vmovq_n_u32(UINT32_C(0x80000000))); in xnn_f32_vrndu_ukernel__neon_x4() 63 const float32x4_t vy = vbslq_f32(vadjmask, vrndx, vadjrndx); in xnn_f32_vrndu_ukernel__neon_x4()
|
D | vrndu-wasmsimd-cvt-x8.c | 71 const v128_t vadjmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vx, vrndx)); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x8() local 73 const v128_t vy = wasm_v128_bitselect(vrndx, vadjrndx, vadjmask); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x8() 86 const v128_t vadjmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vx, vrndx)); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x8() local 88 v128_t vy = wasm_v128_bitselect(vrndx, vadjrndx, vadjmask); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x8()
|
D | vrndd-neon-x8.c | 68 const uint32x4_t vadjmask = vcgtq_f32(vrndx, vx); in xnn_f32_vrndd_ukernel__neon_x8() local 69 const float32x4_t vadjrndx = vreinterpretq_f32_u32(vandq_u32(vadjmask, vone)); in xnn_f32_vrndd_ukernel__neon_x8() 80 const uint32x4_t vadjmask = vcgtq_f32(vrndx, vx); in xnn_f32_vrndd_ukernel__neon_x8() local 81 const float32x4_t vadjrndx = vreinterpretq_f32_u32(vandq_u32(vadjmask, vone)); in xnn_f32_vrndd_ukernel__neon_x8()
|
D | vrndu-wasmsimd-addsub-x8.c | 69 const v128_t vadjmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vx, vrndx)); in xnn_f32_vrndu_ukernel__wasmsimd_addsub_x8() local 71 const v128_t vy = wasm_v128_bitselect(vrndx, vadjrndx, vadjmask); in xnn_f32_vrndu_ukernel__wasmsimd_addsub_x8() 83 const v128_t vadjmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vx, vrndx)); in xnn_f32_vrndu_ukernel__wasmsimd_addsub_x8() local 85 v128_t vy = wasm_v128_bitselect(vrndx, vadjrndx, vadjmask); in xnn_f32_vrndu_ukernel__wasmsimd_addsub_x8()
|
D | vrndu-sse2-x4.c | 57 const __m128 vadjmask = _mm_or_ps(_mm_cmpge_ps(vrndx, vx), _mm_castsi128_ps(vmagic)); in xnn_f32_vrndu_ukernel__sse2_x4() local 59 __m128 vy = _mm_or_ps(_mm_and_ps(vrndx, vadjmask), _mm_andnot_ps(vadjmask, vadjrndx)); in xnn_f32_vrndu_ukernel__sse2_x4()
|
D | vrndd-neon-x4.c | 58 const uint32x4_t vadjmask = vcgtq_f32(vrndx, vx); in xnn_f32_vrndd_ukernel__neon_x4() local 59 const float32x4_t vadjrndx = vreinterpretq_f32_u32(vandq_u32(vadjmask, vone)); in xnn_f32_vrndd_ukernel__neon_x4()
|
/external/XNNPACK/src/math/ |
D | roundu-sse2-cvt.c | 54 const __m128 vadjmask = _mm_or_ps(_mm_cmpge_ps(vrndx, vx), _mm_castsi128_ps(vmagic)); in xnn_math_f32_roundu__sse2_cvt() local 67 const __m128 vy = _mm_or_ps(_mm_and_ps(vrndx, vadjmask), _mm_andnot_ps(vadjmask, vadjrndx)); in xnn_math_f32_roundu__sse2_cvt()
|
D | roundu-sse-addsub.c | 59 const __m128 vadjmask = _mm_and_ps(_mm_cmplt_ps(vrndx, vx), vnonsign_mask); in xnn_math_f32_roundu__sse_addsub() local 71 const __m128 vy = _mm_or_ps(_mm_and_ps(vadjrndx, vadjmask), _mm_andnot_ps(vadjmask, vrndx)); in xnn_math_f32_roundu__sse_addsub()
|
D | roundu-neon-cvt.c | 52 const uint32x4_t vadjmask = vorrq_u32(vcgeq_f32(vrndx, vx), vsign_mask); in xnn_math_f32_roundu__neon_cvt() local 65 const float32x4_t vy = vbslq_f32(vadjmask, vrndx, vadjrndx); in xnn_math_f32_roundu__neon_cvt()
|
D | roundu-wasmsimd-cvt.c | 53 const v128_t vadjmask = wasm_v128_or(wasm_f32x4_ge(vrndx, vx), vsign_mask); in xnn_math_f32_roundu__wasmsimd_cvt() local 66 const v128_t vy = wasm_v128_bitselect(vrndx, vadjrndx, vadjmask); in xnn_math_f32_roundu__wasmsimd_cvt()
|
D | roundu-wasmsimd-addsub.c | 59 const v128_t vadjmask = wasm_v128_or(wasm_f32x4_ge(vrndx, vx), vsign_mask); in xnn_math_f32_roundu__wasmsimd_addsub() local 72 const v128_t vy = wasm_v128_bitselect(vrndx, vadjrndx, vadjmask); in xnn_math_f32_roundu__wasmsimd_addsub()
|
D | roundu-neon-addsub.c | 59 const uint32x4_t vadjmask = vbicq_u32(vcltq_f32(vrndx, vx), vsign_mask); in xnn_math_f32_roundu__neon_addsub() local 71 const float32x4_t vy = vbslq_f32(vadjmask, vadjrndx, vrndx); in xnn_math_f32_roundu__neon_addsub()
|