/external/XNNPACK/src/f32-vrnd/ |
D | vrndz-neon.c.in | 36 uint32x4_t vrndmask${ABC[N:N+4]} = vcaltq_f32(vx${ABC[N:N+4]}, vintegral_threshold); 42 vrndmask${ABC[N:N+4]} = vbicq_u32(vrndmask${ABC[N:N+4]}, vmovq_n_u32(UINT32_C(0x80000000))); 45 …const float32x4_t vy${ABC[N:N+4]} = vbslq_f32(vrndmask${ABC[N:N+4]}, vrndx${ABC[N:N+4]}, vx${ABC[N… 54 uint32x4_t vrndmask = vcaltq_f32(vx, vintegral_threshold); variable 56 vrndmask = vbicq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); 57 const float32x4_t vy = vbslq_f32(vrndmask, vrndx, vx); 63 uint32x4_t vrndmask = vcaltq_f32(vx, vintegral_threshold); variable 65 vrndmask = vbicq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); 66 const float32x4_t vy = vbslq_f32(vrndmask, vrndx, vx);
|
D | vrndne-neon.c.in | 34 uint32x4_t vrndmask${ABC[N:N+4]} = vcaltq_f32(vmagic_number, vx${ABC[N:N+4]}); 40 vrndmask${ABC[N:N+4]} = vorrq_u32(vrndmask${ABC[N:N+4]}, vmovq_n_u32(UINT32_C(0x80000000))); 46 …const float32x4_t vy${ABC[N:N+4]} = vbslq_f32(vrndmask${ABC[N:N+4]}, vx${ABC[N:N+4]}, vrndabsx${AB… 55 uint32x4_t vrndmask = vcaltq_f32(vmagic_number, vx); variable 57 vrndmask = vorrq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); 59 const float32x4_t vy = vbslq_f32(vrndmask, vx, vrndabsx); 65 uint32x4_t vrndmask = vcaltq_f32(vmagic_number, vx); variable 67 vrndmask = vorrq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); 69 const float32x4_t vy = vbslq_f32(vrndmask, vx, vrndabsx);
|
D | vrndd-neon.c.in | 37 uint32x4_t vrndmask${ABC[N:N+4]} = vcaltq_f32(vx${ABC[N:N+4]}, vintegral_threshold); 43 vrndmask${ABC[N:N+4]} = vbicq_u32(vrndmask${ABC[N:N+4]}, vmovq_n_u32(UINT32_C(0x80000000))); 46 …const float32x4_t vrndx${ABC[N:N+4]} = vbslq_f32(vrndmask${ABC[N:N+4]}, vprerndx${ABC[N:N+4]}, vx$… 64 uint32x4_t vrndmask = vcaltq_f32(vx, vintegral_threshold); variable 66 vrndmask = vbicq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); 67 const float32x4_t vrndx = vbslq_f32(vrndmask, vprerndx, vx); 76 uint32x4_t vrndmask = vcaltq_f32(vx, vintegral_threshold); variable 78 vrndmask = vbicq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); 79 const float32x4_t vrndx = vbslq_f32(vrndmask, vprerndx, vx);
|
D | vrndu-neon.c.in | 37 uint32x4_t vrndmask${ABC[N:N+4]} = vcaltq_f32(vx${ABC[N:N+4]}, vintegral_threshold); 43 vrndmask${ABC[N:N+4]} = vbicq_u32(vrndmask${ABC[N:N+4]}, vmovq_n_u32(UINT32_C(0x80000000))); 46 …const float32x4_t vrndx${ABC[N:N+4]} = vbslq_f32(vrndmask${ABC[N:N+4]}, vprerndx${ABC[N:N+4]}, vx$… 67 uint32x4_t vrndmask = vcaltq_f32(vx, vintegral_threshold); variable 69 vrndmask = vbicq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); 70 const float32x4_t vrndx = vbslq_f32(vrndmask, vprerndx, vx); 81 uint32x4_t vrndmask = vcaltq_f32(vx, vintegral_threshold); variable 82 vrndmask = vbicq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); 83 const float32x4_t vrndx = vbslq_f32(vrndmask, vprerndx, vx);
|
D | vrndne-sse2.c.in | 38 …const __m128 vrndmask${ABC[N:N+4]} = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx${… 44 …{ABC[N:N+4]} = _mm_or_ps(_mm_and_ps(vx${ABC[N:N+4]}, vrndmask${ABC[N:N+4]}), _mm_andnot_ps(vrndmas… 57 … const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); variable 59 const __m128 vy = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vrndx)); 67 const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); variable 69 __m128 vy = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vrndx));
|
D | vrndz-sse2.c.in | 38 …const __m128 vrndmask${ABC[N:N+4]} = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx${… 44 …{ABC[N:N+4]} = _mm_or_ps(_mm_and_ps(vx${ABC[N:N+4]}, vrndmask${ABC[N:N+4]}), _mm_andnot_ps(vrndmas… 57 … const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); variable 59 const __m128 vy = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vrndx)); 67 const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); variable 69 __m128 vy = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vrndx));
|
D | vrndd-sse2.c.in | 39 …const __m128 vrndmask${ABC[N:N+4]} = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx${… 45 …{ABC[N:N+4]} = _mm_or_ps(_mm_and_ps(vx${ABC[N:N+4]}, vrndmask${ABC[N:N+4]}), _mm_andnot_ps(vrndmas… 61 … const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); variable 63 const __m128 vrndx = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vprerndx)); 72 const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); variable 74 const __m128 vrndx = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vprerndx));
|
D | vrndu-sse2.c.in | 39 …const __m128 vrndmask${ABC[N:N+4]} = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx${… 45 …{ABC[N:N+4]} = _mm_or_ps(_mm_and_ps(vx${ABC[N:N+4]}, vrndmask${ABC[N:N+4]}), _mm_andnot_ps(vrndmas… 67 … const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); variable 69 const __m128 vrndx = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vprerndx)); 80 const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); variable 82 const __m128 vrndx = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vprerndx));
|
D | vrndz-wasmsimd-cvt.c.in | 42 …const v128_t vrndmask${ABC[N:N+4]} = wasm_v128_andnot(wasm_f32x4_lt(vabsx${ABC[N:N+4]}, vmagic_num… 45 … vy${ABC[N:N+4]} = wasm_v128_bitselect(vrndx${ABC[N:N+4]}, vx${ABC[N:N+4]}, vrndmask${ABC[N:N+4]}); 59 const v128_t vrndmask = wasm_v128_andnot(wasm_f32x4_lt(vabsx, vmagic_number), vsign_mask); variable 60 const v128_t vy = wasm_v128_bitselect(vrndx, vx, vrndmask); 71 const v128_t vrndmask = wasm_v128_andnot(wasm_f32x4_lt(vabsx, vmagic_number), vsign_mask); variable 72 v128_t vy = wasm_v128_bitselect(vrndx, vx, vrndmask);
|
/external/XNNPACK/src/f32-vrnd/gen/ |
D | vrndz-neon-x8.c | 54 uint32x4_t vrndmask = vcaltq_f32(vx, vintegral_threshold); in xnn_f32_vrndz_ukernel__neon_x8() local 56 vrndmask = vbicq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); in xnn_f32_vrndz_ukernel__neon_x8() 57 const float32x4_t vy = vbslq_f32(vrndmask, vrndx, vx); in xnn_f32_vrndz_ukernel__neon_x8() 63 uint32x4_t vrndmask = vcaltq_f32(vx, vintegral_threshold); in xnn_f32_vrndz_ukernel__neon_x8() local 65 vrndmask = vbicq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); in xnn_f32_vrndz_ukernel__neon_x8() 66 const float32x4_t vy = vbslq_f32(vrndmask, vrndx, vx); in xnn_f32_vrndz_ukernel__neon_x8()
|
D | vrndne-neon-x8.c | 56 uint32x4_t vrndmask = vcaltq_f32(vmagic_number, vx); in xnn_f32_vrndne_ukernel__neon_x8() local 58 vrndmask = vorrq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); in xnn_f32_vrndne_ukernel__neon_x8() 60 const float32x4_t vy = vbslq_f32(vrndmask, vx, vrndabsx); in xnn_f32_vrndne_ukernel__neon_x8() 66 uint32x4_t vrndmask = vcaltq_f32(vmagic_number, vx); in xnn_f32_vrndne_ukernel__neon_x8() local 68 vrndmask = vorrq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); in xnn_f32_vrndne_ukernel__neon_x8() 70 const float32x4_t vy = vbslq_f32(vrndmask, vx, vrndabsx); in xnn_f32_vrndne_ukernel__neon_x8()
|
D | vrndd-neon-x8.c | 64 uint32x4_t vrndmask = vcaltq_f32(vx, vintegral_threshold); in xnn_f32_vrndd_ukernel__neon_x8() local 66 vrndmask = vbicq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); in xnn_f32_vrndd_ukernel__neon_x8() 67 const float32x4_t vrndx = vbslq_f32(vrndmask, vprerndx, vx); in xnn_f32_vrndd_ukernel__neon_x8() 76 uint32x4_t vrndmask = vcaltq_f32(vx, vintegral_threshold); in xnn_f32_vrndd_ukernel__neon_x8() local 78 vrndmask = vbicq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); in xnn_f32_vrndd_ukernel__neon_x8() 79 const float32x4_t vrndx = vbslq_f32(vrndmask, vprerndx, vx); in xnn_f32_vrndd_ukernel__neon_x8()
|
D | vrndu-neon-x8.c | 67 uint32x4_t vrndmask = vcaltq_f32(vx, vintegral_threshold); in xnn_f32_vrndu_ukernel__neon_x8() local 69 vrndmask = vbicq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); in xnn_f32_vrndu_ukernel__neon_x8() 70 const float32x4_t vrndx = vbslq_f32(vrndmask, vprerndx, vx); in xnn_f32_vrndu_ukernel__neon_x8() 81 uint32x4_t vrndmask = vcaltq_f32(vx, vintegral_threshold); in xnn_f32_vrndu_ukernel__neon_x8() local 82 vrndmask = vbicq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); in xnn_f32_vrndu_ukernel__neon_x8() 83 const float32x4_t vrndx = vbslq_f32(vrndmask, vprerndx, vx); in xnn_f32_vrndu_ukernel__neon_x8()
|
D | vrndz-sse2-x8.c | 55 const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); in xnn_f32_vrndz_ukernel__sse2_x8() local 57 const __m128 vy = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vrndx)); in xnn_f32_vrndz_ukernel__sse2_x8() 65 const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); in xnn_f32_vrndz_ukernel__sse2_x8() local 67 __m128 vy = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vrndx)); in xnn_f32_vrndz_ukernel__sse2_x8()
|
D | vrndne-sse2-x8.c | 55 const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); in xnn_f32_vrndne_ukernel__sse2_x8() local 57 const __m128 vy = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vrndx)); in xnn_f32_vrndne_ukernel__sse2_x8() 65 const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); in xnn_f32_vrndne_ukernel__sse2_x8() local 67 __m128 vy = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vrndx)); in xnn_f32_vrndne_ukernel__sse2_x8()
|
D | vrndd-sse2-x8.c | 59 const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); in xnn_f32_vrndd_ukernel__sse2_x8() local 61 const __m128 vrndx = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vprerndx)); in xnn_f32_vrndd_ukernel__sse2_x8() 70 const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); in xnn_f32_vrndd_ukernel__sse2_x8() local 72 const __m128 vrndx = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vprerndx)); in xnn_f32_vrndd_ukernel__sse2_x8()
|
D | vrndu-sse2-x8.c | 65 const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); in xnn_f32_vrndu_ukernel__sse2_x8() local 67 const __m128 vrndx = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vprerndx)); in xnn_f32_vrndu_ukernel__sse2_x8() 78 const __m128 vrndmask = _mm_castsi128_ps(_mm_or_si128(vmagic, _mm_cmpeq_epi32(vintx, vmagic))); in xnn_f32_vrndu_ukernel__sse2_x8() local 80 const __m128 vrndx = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vprerndx)); in xnn_f32_vrndu_ukernel__sse2_x8()
|
D | vrndz-neon-x4.c | 47 uint32x4_t vrndmask = vcaltq_f32(vx, vintegral_threshold); in xnn_f32_vrndz_ukernel__neon_x4() local 49 vrndmask = vbicq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); in xnn_f32_vrndz_ukernel__neon_x4() 50 const float32x4_t vy = vbslq_f32(vrndmask, vrndx, vx); in xnn_f32_vrndz_ukernel__neon_x4()
|
D | vrndne-wasmsimd-addsub-x4.c | 35 const v128_t vrndmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vmagic_number, vabsx)); in xnn_f32_vrndne_ukernel__wasmsimd_addsub_x4() local 37 const v128_t vy = wasm_v128_bitselect(vx, vrndabsx, vrndmask); in xnn_f32_vrndne_ukernel__wasmsimd_addsub_x4() 46 const v128_t vrndmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vmagic_number, vabsx)); in xnn_f32_vrndne_ukernel__wasmsimd_addsub_x4() local 48 v128_t vy = wasm_v128_bitselect(vx, vrndabsx, vrndmask); in xnn_f32_vrndne_ukernel__wasmsimd_addsub_x4()
|
D | vrndz-wasmsimd-cvt-x4.c | 37 const v128_t vrndmask = wasm_v128_andnot(wasm_f32x4_lt(vabsx, vmagic_number), vsign_mask); in xnn_f32_vrndz_ukernel__wasmsimd_cvt_x4() local 38 const v128_t vy = wasm_v128_bitselect(vrndx, vx, vrndmask); in xnn_f32_vrndz_ukernel__wasmsimd_cvt_x4() 49 const v128_t vrndmask = wasm_v128_andnot(wasm_f32x4_lt(vabsx, vmagic_number), vsign_mask); in xnn_f32_vrndz_ukernel__wasmsimd_cvt_x4() local 50 v128_t vy = wasm_v128_bitselect(vrndx, vx, vrndmask); in xnn_f32_vrndz_ukernel__wasmsimd_cvt_x4()
|
D | vrndne-neon-x4.c | 48 uint32x4_t vrndmask = vcaltq_f32(vmagic_number, vx); in xnn_f32_vrndne_ukernel__neon_x4() local 50 vrndmask = vorrq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); in xnn_f32_vrndne_ukernel__neon_x4() 52 const float32x4_t vy = vbslq_f32(vrndmask, vx, vrndabsx); in xnn_f32_vrndne_ukernel__neon_x4()
|
D | vrndd-wasmsimd-addsub-x4.c | 36 const v128_t vrndmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vmagic_number, vabsx)); in xnn_f32_vrndd_ukernel__wasmsimd_addsub_x4() local 38 const v128_t vrndx = wasm_v128_bitselect(vx, vrndabsx, vrndmask); in xnn_f32_vrndd_ukernel__wasmsimd_addsub_x4() 48 const v128_t vrndmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vmagic_number, vabsx)); in xnn_f32_vrndd_ukernel__wasmsimd_addsub_x4() local 50 const v128_t vrndx = wasm_v128_bitselect(vx, vrndabsx, vrndmask); in xnn_f32_vrndd_ukernel__wasmsimd_addsub_x4()
|
D | vrndd-wasmsimd-cvt-x4.c | 38 const v128_t vrndmask = wasm_v128_andnot(wasm_f32x4_lt(vabsx, vmagic_number), vsign_mask); in xnn_f32_vrndd_ukernel__wasmsimd_cvt_x4() local 39 const v128_t vrndx = wasm_v128_bitselect(vprerndx, vx, vrndmask); in xnn_f32_vrndd_ukernel__wasmsimd_cvt_x4() 52 const v128_t vrndmask = wasm_v128_andnot(wasm_f32x4_lt(vabsx, vmagic_number), vsign_mask); in xnn_f32_vrndd_ukernel__wasmsimd_cvt_x4() local 53 const v128_t vrndx = wasm_v128_bitselect(vprerndx, vx, vrndmask); in xnn_f32_vrndd_ukernel__wasmsimd_cvt_x4()
|
D | vrndz-wasmsimd-addsub-x4.c | 36 const v128_t vrndmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vmagic_number, vabsx)); in xnn_f32_vrndz_ukernel__wasmsimd_addsub_x4() local 40 const v128_t vy = wasm_v128_bitselect(vx, vflrabsx, vrndmask); in xnn_f32_vrndz_ukernel__wasmsimd_addsub_x4() 49 const v128_t vrndmask = wasm_v128_or(vsign_mask, wasm_f32x4_le(vmagic_number, vabsx)); in xnn_f32_vrndz_ukernel__wasmsimd_addsub_x4() local 53 v128_t vy = wasm_v128_bitselect(vx, vflrabsx, vrndmask); in xnn_f32_vrndz_ukernel__wasmsimd_addsub_x4()
|
D | vrndd-neon-x4.c | 54 uint32x4_t vrndmask = vcaltq_f32(vx, vintegral_threshold); in xnn_f32_vrndd_ukernel__neon_x4() local 56 vrndmask = vbicq_u32(vrndmask, vmovq_n_u32(UINT32_C(0x80000000))); in xnn_f32_vrndd_ukernel__neon_x4() 57 const float32x4_t vrndx = vbslq_f32(vrndmask, vprerndx, vx); in xnn_f32_vrndd_ukernel__neon_x4()
|