Home
last modified time | relevance | path

Searched refs:vxCDEF (Results 1 – 25 of 163) sorted by relevance

1234567

/external/XNNPACK/src/f32-hswish/gen/
Dhswish-wasmsimd-x16.c36 v128_t vxCDEF = wasm_v128_load(x + 12); in xnn_f32_hswish_ukernel__wasmsimd_x16() local
45 v128_t vaccCDEF = wasm_f32x4_add(vxCDEF, vthree); in xnn_f32_hswish_ukernel__wasmsimd_x16()
46 vxCDEF = wasm_f32x4_mul(vxCDEF, vsixth); in xnn_f32_hswish_ukernel__wasmsimd_x16()
61 vaccCDEF = wasm_f32x4_mul(vaccCDEF, vxCDEF); in xnn_f32_hswish_ukernel__wasmsimd_x16()
Dhswish-neon-x16.c36 float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neon_x16() local
44 float32x4_t vaccCDEF = vaddq_f32(vxCDEF, vthree); in xnn_f32_hswish_ukernel__neon_x16()
45 vxCDEF = vmulq_f32(vxCDEF, vsixth); in xnn_f32_hswish_ukernel__neon_x16()
60 vaccCDEF = vmulq_f32(vaccCDEF, vxCDEF); in xnn_f32_hswish_ukernel__neon_x16()
/external/XNNPACK/src/f32-vsqrt/gen/
Dneonfma-nr1rsqrts1fma1adj-x16.c33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() local
38 float32x4_t vrsqrtxCDEF = vrsqrteq_f32(vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16()
48 const float32x4_t vcorrectionCDEF = vrsqrtsq_f32(vxCDEF, vrxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16()
61 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16()
81 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16()
Dneonfma-nr1rsqrts1fma1adj-x20.c33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() local
39 float32x4_t vrsqrtxCDEF = vrsqrteq_f32(vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20()
51 const float32x4_t vcorrectionCDEF = vrsqrtsq_f32(vxCDEF, vrxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20()
66 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20()
91 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20()
Dneonfma-nr2fma1adj-x16.c33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() local
38 const float32x4_t vrsqrtxCDEF = vrsqrteq_f32(vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16()
46 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16()
80 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16()
Dneonfma-nr1rsqrts1fma1adj-x24.c33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() local
40 float32x4_t vrsqrtxCDEF = vrsqrteq_f32(vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
54 const float32x4_t vcorrectionCDEF = vrsqrtsq_f32(vxCDEF, vrxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
71 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
101 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
Dneonfma-nr1rsqrts1fma1adj-x28.c33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() local
41 float32x4_t vrsqrtxCDEF = vrsqrteq_f32(vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
57 const float32x4_t vcorrectionCDEF = vrsqrtsq_f32(vxCDEF, vrxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
76 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
111 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
Dneonfma-nr2fma1adj-x20.c33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() local
39 const float32x4_t vrsqrtxCDEF = vrsqrteq_f32(vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20()
48 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20()
90 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20()
Dneonfma-nr1rsqrts1fma1adj-x32.c33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local
42 float32x4_t vrsqrtxCDEF = vrsqrteq_f32(vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
60 const float32x4_t vcorrectionCDEF = vrsqrtsq_f32(vxCDEF, vrxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
81 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
121 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
Dneonfma-nr2fma1adj-x24.c33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() local
40 const float32x4_t vrsqrtxCDEF = vrsqrteq_f32(vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
50 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
100 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
Dneonfma-nr1rsqrts1fma1adj-x36.c33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local
43 float32x4_t vrsqrtxCDEF = vrsqrteq_f32(vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
63 const float32x4_t vcorrectionCDEF = vrsqrtsq_f32(vxCDEF, vrxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
86 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
131 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
Dneonfma-nr2fma1adj-x28.c33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() local
41 const float32x4_t vrsqrtxCDEF = vrsqrteq_f32(vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
52 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
110 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
Dneonfma-nr1rsqrts1fma1adj-x40.c33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local
44 float32x4_t vrsqrtxCDEF = vrsqrteq_f32(vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
66 const float32x4_t vcorrectionCDEF = vrsqrtsq_f32(vxCDEF, vrxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
91 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
141 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-neonfma-rr1-p6-x16.c48 float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() local
53 const float32x4_t vzCDEF = vmaxq_f32(vmulq_f32(vxCDEF, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16()
124 const uint32x4_t vmCDEF = vcltq_f32(vxCDEF, vmovq_n_f32(0.0f)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16()
125 vxCDEF = vmulq_f32(vxCDEF, vbeta); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16()
130 const float32x4_t vyCDEF = vbslq_f32(vmCDEF, veCDEF, vxCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16()
Dvelu-wasmsimd-arm-rr2-p6-x16.c49 v128_t vxCDEF = wasm_v128_load(x + 12); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() local
55 const v128_t vzCDEF = wasm_f32x4_max(wasm_f32x4_mul(vxCDEF, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16()
132 const v128_t vsignmCDEF = wasm_i32x4_shr(vxCDEF, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16()
133 vxCDEF = wasm_f32x4_mul(vxCDEF, vbeta); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16()
138 const v128_t vyCDEF = wasm_v128_bitselect(veCDEF, vxCDEF, vsignmCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16()
Dvelu-sse41-rr2-p6-x16.c49 __m128 vxCDEF = _mm_loadu_ps(x + 12); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() local
55 const __m128 vzCDEF = _mm_max_ps(vsat_cutoff, _mm_mul_ps(vxCDEF, vprescale)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16()
129 vxCDEF = _mm_mul_ps(vxCDEF, vbeta); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16()
134 const __m128 vyCDEF = _mm_blendv_ps(vxCDEF, veCDEF, vxCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16()
Dvelu-neon-rr2-p6-x16.c49 float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_velu_ukernel__neon_rr2_p6_x16() local
54 const float32x4_t vzCDEF = vmaxq_f32(vmulq_f32(vxCDEF, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neon_rr2_p6_x16()
130 const uint32x4_t vmCDEF = vcltq_f32(vxCDEF, vmovq_n_f32(0.0f)); in xnn_f32_velu_ukernel__neon_rr2_p6_x16()
131 vxCDEF = vmulq_f32(vxCDEF, vbeta); in xnn_f32_velu_ukernel__neon_rr2_p6_x16()
136 const float32x4_t vyCDEF = vbslq_f32(vmCDEF, veCDEF, vxCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x16()
Dvelu-neonfma-rr1-p6-x20.c48 float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() local
54 const float32x4_t vzCDEF = vmaxq_f32(vmulq_f32(vxCDEF, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
139 const uint32x4_t vmCDEF = vcltq_f32(vxCDEF, vmovq_n_f32(0.0f)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
140 vxCDEF = vmulq_f32(vxCDEF, vbeta); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
147 const float32x4_t vyCDEF = vbslq_f32(vmCDEF, veCDEF, vxCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
Dvelu-sse2-rr2-p6-x16.c49 __m128 vxCDEF = _mm_loadu_ps(x + 12); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() local
55 const __m128 vzCDEF = _mm_max_ps(vsat_cutoff, _mm_mul_ps(vxCDEF, vprescale)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16()
132 …t __m128 vmCDEF = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vxCDEF))); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16()
133 vxCDEF = _mm_mul_ps(vxCDEF, vbeta); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16()
138 const __m128 vyCDEF = _mm_or_ps(_mm_and_ps(veCDEF, vmCDEF), _mm_andnot_ps(vmCDEF, vxCDEF)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16()
Dvelu-wasmsimd-arm-rr2-p6-x20.c49 v128_t vxCDEF = wasm_v128_load(x + 12); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() local
56 const v128_t vzCDEF = wasm_f32x4_max(wasm_f32x4_mul(vxCDEF, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
148 const v128_t vsignmCDEF = wasm_i32x4_shr(vxCDEF, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
149 vxCDEF = wasm_f32x4_mul(vxCDEF, vbeta); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
156 const v128_t vyCDEF = wasm_v128_bitselect(veCDEF, vxCDEF, vsignmCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
Dvelu-sse41-rr2-p6-x20.c49 __m128 vxCDEF = _mm_loadu_ps(x + 12); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() local
56 const __m128 vzCDEF = _mm_max_ps(vsat_cutoff, _mm_mul_ps(vxCDEF, vprescale)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
145 vxCDEF = _mm_mul_ps(vxCDEF, vbeta); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
151 const __m128 vyCDEF = _mm_blendv_ps(vxCDEF, veCDEF, vxCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
Dvelu-neon-rr2-p6-x20.c49 float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_velu_ukernel__neon_rr2_p6_x20() local
55 const float32x4_t vzCDEF = vmaxq_f32(vmulq_f32(vxCDEF, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
146 const uint32x4_t vmCDEF = vcltq_f32(vxCDEF, vmovq_n_f32(0.0f)); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
147 vxCDEF = vmulq_f32(vxCDEF, vbeta); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
154 const float32x4_t vyCDEF = vbslq_f32(vmCDEF, veCDEF, vxCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
Dvelu-wasmsimd-x86-rr2-p6-x16.c49 v128_t vxCDEF = wasm_v128_load(x + 12); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() local
55 const v128_t vzCDEF = wasm_f32x4_mul(vxCDEF, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16()
144 const v128_t vsignmCDEF = wasm_i32x4_shr(vxCDEF, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16()
145 vxCDEF = wasm_f32x4_mul(vxCDEF, vbeta); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16()
150 const v128_t vyCDEF = wasm_v128_bitselect(veCDEF, vxCDEF, vsignmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16()
Dvelu-neon-rr2-p6-x24.c49 float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_velu_ukernel__neon_rr2_p6_x24() local
56 const float32x4_t vzCDEF = vmaxq_f32(vmulq_f32(vxCDEF, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
162 const uint32x4_t vmCDEF = vcltq_f32(vxCDEF, vmovq_n_f32(0.0f)); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
163 vxCDEF = vmulq_f32(vxCDEF, vbeta); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
172 const float32x4_t vyCDEF = vbslq_f32(vmCDEF, veCDEF, vxCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
Dvelu-wasmsimd-arm-rr2-lut16-p3-x16.c49 v128_t vxCDEF = wasm_v128_load(x + 12); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16() local
55 const v128_t vzCDEF = wasm_f32x4_max(wasm_f32x4_mul(vxCDEF, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16()
154 const v128_t vsignmCDEF = wasm_i32x4_shr(vxCDEF, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16()
155 vxCDEF = wasm_f32x4_mul(vxCDEF, vbeta); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16()
160 const v128_t vyCDEF = wasm_v128_bitselect(veCDEF, vxCDEF, vsignmCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16()

1234567