/external/XNNPACK/src/f32-vlrelu/gen/ |
D | vlrelu-wasmsimd-minmax-x8.c | 31 v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() local 36 v128_t vacc4567 = wasm_i32x4_max(vx4567, vzero); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 37 vx4567 = wasm_i32x4_min(vx4567, vzero); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 40 vacc4567 = wasm_f32x4_add(vacc4567, wasm_f32x4_mul(vx4567, vslope)); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8()
|
D | vlrelu-sse-x8.c | 31 __m128 vx4567 = _mm_loadu_ps(x + 4); in xnn_f32_vlrelu_ukernel__sse_x8() local 36 __m128 vacc4567 = _mm_max_ps(_mm_setzero_ps(), vx4567); in xnn_f32_vlrelu_ukernel__sse_x8() 37 vx4567 = _mm_min_ps(vx4567, vzero); in xnn_f32_vlrelu_ukernel__sse_x8() 40 vacc4567 = _mm_add_ps(vacc4567, _mm_mul_ps(vx4567, vslope)); in xnn_f32_vlrelu_ukernel__sse_x8()
|
D | vlrelu-sse41-x8.c | 30 const __m128 vx4567 = _mm_loadu_ps(x + 4); in xnn_f32_vlrelu_ukernel__sse41_x8() local 34 __m128 vacc4567 = _mm_mul_ps(vx4567, vslope); in xnn_f32_vlrelu_ukernel__sse41_x8() 37 vacc4567 = _mm_blendv_ps(vx4567, vacc4567, vx4567); in xnn_f32_vlrelu_ukernel__sse41_x8()
|
D | vlrelu-wasmsimd-bitselect-x8.c | 31 const v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8() local 36 v128_t vacc4567 = wasm_f32x4_mul(vx4567, vslope); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8() 37 const v128_t vmask4567 = wasm_i32x4_lt(vx4567, vzero); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8() 40 vacc4567 = wasm_v128_bitselect(vacc4567, vx4567, vmask4567); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8()
|
D | vlrelu-neon-x8.c | 31 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vlrelu_ukernel__neon_x8() local 35 float32x4_t vacc4567 = vmulq_f32(vx4567, vslope); in xnn_f32_vlrelu_ukernel__neon_x8() 36 const uint32x4_t vmask4567 = vcltq_s32(vreinterpretq_s32_f32(vx4567), vmovq_n_s32(0)); in xnn_f32_vlrelu_ukernel__neon_x8() 39 vacc4567 = vbslq_f32(vmask4567, vacc4567, vx4567); in xnn_f32_vlrelu_ukernel__neon_x8()
|
D | vlrelu-sse2-x8.c | 30 const __m128 vx4567 = _mm_loadu_ps(x + 4); in xnn_f32_vlrelu_ukernel__sse2_x8() local 35 __m128 vacc4567 = _mm_mul_ps(vx4567, vslope); in xnn_f32_vlrelu_ukernel__sse2_x8() 36 …_m128 vmask4567 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx4567))); in xnn_f32_vlrelu_ukernel__sse2_x8() 39 vacc4567 = _mm_or_ps(_mm_and_ps(vacc4567, vmask4567), _mm_andnot_ps(vmask4567, vx4567)); in xnn_f32_vlrelu_ukernel__sse2_x8()
|
/external/XNNPACK/src/f32-hswish/gen/ |
D | hswish-wasmsimd-x8.c | 34 v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_hswish_ukernel__wasmsimd_x8() local 39 v128_t vacc4567 = wasm_f32x4_add(vx4567, vthree); in xnn_f32_hswish_ukernel__wasmsimd_x8() 40 vx4567 = wasm_f32x4_mul(vx4567, vsixth); in xnn_f32_hswish_ukernel__wasmsimd_x8() 49 vacc4567 = wasm_f32x4_mul(vacc4567, vx4567); in xnn_f32_hswish_ukernel__wasmsimd_x8()
|
D | hswish-neon-x8.c | 34 float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neon_x8() local 38 float32x4_t vacc4567 = vaddq_f32(vx4567, vthree); in xnn_f32_hswish_ukernel__neon_x8() 39 vx4567 = vmulq_f32(vx4567, vsixth); in xnn_f32_hswish_ukernel__neon_x8() 48 vacc4567 = vmulq_f32(vacc4567, vx4567); in xnn_f32_hswish_ukernel__neon_x8()
|
D | hswish-wasmsimd-x16.c | 34 v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_hswish_ukernel__wasmsimd_x16() local 41 v128_t vacc4567 = wasm_f32x4_add(vx4567, vthree); in xnn_f32_hswish_ukernel__wasmsimd_x16() 42 vx4567 = wasm_f32x4_mul(vx4567, vsixth); in xnn_f32_hswish_ukernel__wasmsimd_x16() 59 vacc4567 = wasm_f32x4_mul(vacc4567, vx4567); in xnn_f32_hswish_ukernel__wasmsimd_x16()
|
D | hswish-neon-x16.c | 34 float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neon_x16() local 40 float32x4_t vacc4567 = vaddq_f32(vx4567, vthree); in xnn_f32_hswish_ukernel__neon_x16() 41 vx4567 = vmulq_f32(vx4567, vsixth); in xnn_f32_hswish_ukernel__neon_x16() 58 vacc4567 = vmulq_f32(vacc4567, vx4567); in xnn_f32_hswish_ukernel__neon_x16()
|
/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr1rsqrts1fma1adj-x8.c | 31 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() local 34 float32x4_t vrsqrtx4567 = vrsqrteq_f32(vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() 40 const float32x4_t vcorrection4567 = vrsqrtsq_f32(vx4567, vrx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() 47 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() 59 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8()
|
D | neonfma-nr1rsqrts1fma1adj-x12.c | 31 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() local 35 float32x4_t vrsqrtx4567 = vrsqrteq_f32(vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 43 const float32x4_t vcorrection4567 = vrsqrtsq_f32(vx4567, vrx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 52 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 69 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12()
|
D | neonfma-nr2fma1adj-x8.c | 31 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() local 34 const float32x4_t vrsqrtx4567 = vrsqrteq_f32(vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 38 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 58 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8()
|
D | neonfma-nr1rsqrts1fma1adj-x16.c | 31 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() local 36 float32x4_t vrsqrtx4567 = vrsqrteq_f32(vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 46 const float32x4_t vcorrection4567 = vrsqrtsq_f32(vx4567, vrx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 57 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 79 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16()
|
/external/XNNPACK/src/f32-vrnd/gen/ |
D | vrndd-wasmsimd-cvt-x8.c | 33 const v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_vrndd_ukernel__wasmsimd_cvt_x8() local 38 const v128_t vintx4567 = wasm_i32x4_trunc_saturate_f32x4(vx4567); in xnn_f32_vrndd_ukernel__wasmsimd_cvt_x8() 39 const v128_t vabsx4567 = wasm_f32x4_abs(vx4567); in xnn_f32_vrndd_ukernel__wasmsimd_cvt_x8() 47 const v128_t vrndx4567 = wasm_v128_bitselect(vprerndx4567, vx4567, vrndmask4567); in xnn_f32_vrndd_ukernel__wasmsimd_cvt_x8() 50 const v128_t vadj4567 = wasm_v128_and(wasm_f32x4_lt(vx4567, vrndx4567), vone); in xnn_f32_vrndd_ukernel__wasmsimd_cvt_x8()
|
D | vrndu-wasmsimd-cvt-x8.c | 33 const v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x8() local 38 const v128_t vintx4567 = wasm_i32x4_trunc_saturate_f32x4(vx4567); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x8() 39 const v128_t vabsx4567 = wasm_f32x4_abs(vx4567); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x8() 47 const v128_t vrndx4567 = wasm_v128_bitselect(vprerndx4567, vx4567, vrndmask4567); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x8() 50 const v128_t vadjmask4567 = wasm_v128_or(vsign_mask, wasm_f32x4_le(vx4567, vrndx4567)); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x8()
|
D | vrndd-neon-x8.c | 32 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vrndd_ukernel__neon_x8() local 35 const int32x4_t vintx4567 = vcvtq_s32_f32(vx4567); in xnn_f32_vrndd_ukernel__neon_x8() 38 uint32x4_t vrndmask4567 = vcaltq_f32(vx4567, vintegral_threshold); in xnn_f32_vrndd_ukernel__neon_x8() 47 const float32x4_t vrndx4567 = vbslq_f32(vrndmask4567, vprerndx4567, vx4567); in xnn_f32_vrndd_ukernel__neon_x8() 50 const uint32x4_t vadjmask4567 = vcgtq_f32(vrndx4567, vx4567); in xnn_f32_vrndd_ukernel__neon_x8()
|
D | vrndz-wasmsimd-cvt-x8.c | 32 const v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_vrndz_ukernel__wasmsimd_cvt_x8() local 37 const v128_t vintx4567 = wasm_i32x4_trunc_saturate_f32x4(vx4567); in xnn_f32_vrndz_ukernel__wasmsimd_cvt_x8() 38 const v128_t vabsx4567 = wasm_f32x4_abs(vx4567); in xnn_f32_vrndz_ukernel__wasmsimd_cvt_x8() 46 const v128_t vy4567 = wasm_v128_bitselect(vrndx4567, vx4567, vrndmask4567); in xnn_f32_vrndz_ukernel__wasmsimd_cvt_x8()
|
D | vrndz-neon-x8.c | 31 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vrndz_ukernel__neon_x8() local 34 const int32x4_t vintx4567 = vcvtq_s32_f32(vx4567); in xnn_f32_vrndz_ukernel__neon_x8() 37 uint32x4_t vrndmask4567 = vcaltq_f32(vx4567, vintegral_threshold); in xnn_f32_vrndz_ukernel__neon_x8() 46 const float32x4_t vy4567 = vbslq_f32(vrndmask4567, vrndx4567, vx4567); in xnn_f32_vrndz_ukernel__neon_x8()
|
D | vrndu-neon-x8.c | 32 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vrndu_ukernel__neon_x8() local 35 const int32x4_t vintx4567 = vcvtq_s32_f32(vx4567); in xnn_f32_vrndu_ukernel__neon_x8() 38 uint32x4_t vrndmask4567 = vcaltq_f32(vx4567, vintegral_threshold); in xnn_f32_vrndu_ukernel__neon_x8() 47 const float32x4_t vrndx4567 = vbslq_f32(vrndmask4567, vprerndx4567, vx4567); in xnn_f32_vrndu_ukernel__neon_x8() 50 uint32x4_t vadjmask4567 = vcgeq_f32(vrndx4567, vx4567); in xnn_f32_vrndu_ukernel__neon_x8()
|
D | vrndne-neon-x8.c | 31 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vrndne_ukernel__neon_x8() local 35 const float32x4_t vabsx4567 = vabsq_f32(vx4567); in xnn_f32_vrndne_ukernel__neon_x8() 36 uint32x4_t vrndmask4567 = vcaltq_f32(vmagic_number, vx4567); in xnn_f32_vrndne_ukernel__neon_x8() 48 const float32x4_t vy4567 = vbslq_f32(vrndmask4567, vx4567, vrndabsx4567); in xnn_f32_vrndne_ukernel__neon_x8()
|
D | vrndd-sse2-x8.c | 32 const __m128 vx4567 = _mm_loadu_ps(x + 4); in xnn_f32_vrndd_ukernel__sse2_x8() local 36 const __m128i vintx4567 = _mm_cvttps_epi32(vx4567); in xnn_f32_vrndd_ukernel__sse2_x8() 45 …const __m128 vrndx4567 = _mm_or_ps(_mm_and_ps(vx4567, vrndmask4567), _mm_andnot_ps(vrndmask4567, v… in xnn_f32_vrndd_ukernel__sse2_x8() 48 const __m128 vy4567 = _mm_sub_ps(vrndx4567, _mm_and_ps(_mm_cmpgt_ps(vrndx4567, vx4567), vone)); in xnn_f32_vrndd_ukernel__sse2_x8()
|
/external/XNNPACK/src/f32-vunary/gen/ |
D | vsqr-sse-x8.c | 32 const __m128 vx4567 = _mm_loadu_ps(x + 4); in xnn_f32_vsqr_ukernel__sse_x8() local 36 const __m128 vy4567 = _mm_mul_ps(vx4567, vx4567); in xnn_f32_vsqr_ukernel__sse_x8()
|
D | vsqr-wasmsimd-x8.c | 32 const v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_vsqr_ukernel__wasmsimd_x8() local 36 const v128_t vy4567 = wasm_f32x4_mul(vx4567, vx4567); in xnn_f32_vsqr_ukernel__wasmsimd_x8()
|
D | vsqr-neon-x8.c | 31 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vsqr_ukernel__neon_x8() local 34 const float32x4_t vy4567 = vmulq_f32(vx4567, vx4567); in xnn_f32_vsqr_ukernel__neon_x8()
|