Home
last modified time | relevance | path

Searched refs:vxKLMN (Results 1 – 25 of 51) sorted by relevance

123

/external/XNNPACK/src/f32-vsqrt/gen/
Dneonfma-nr1rsqrts1fma1adj-x24.c35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() local
42 float32x4_t vrsqrtxKLMN = vrsqrteq_f32(vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
56 const float32x4_t vcorrectionKLMN = vrsqrtsq_f32(vxKLMN, vrxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
75 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
103 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
Dneonfma-nr1rsqrts1fma1adj-x28.c35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() local
43 float32x4_t vrsqrtxKLMN = vrsqrteq_f32(vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
59 const float32x4_t vcorrectionKLMN = vrsqrtsq_f32(vxKLMN, vrxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
80 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
113 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
Dneonfma-nr1rsqrts1fma1adj-x32.c35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local
44 float32x4_t vrsqrtxKLMN = vrsqrteq_f32(vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
62 const float32x4_t vcorrectionKLMN = vrsqrtsq_f32(vxKLMN, vrxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
85 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
123 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
Dneonfma-nr2fma1adj-x24.c35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() local
42 const float32x4_t vrsqrtxKLMN = vrsqrteq_f32(vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
54 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
102 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
Dneonfma-nr1rsqrts1fma1adj-x36.c35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local
45 float32x4_t vrsqrtxKLMN = vrsqrteq_f32(vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
65 const float32x4_t vcorrectionKLMN = vrsqrtsq_f32(vxKLMN, vrxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
90 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
133 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
Dneonfma-nr2fma1adj-x28.c35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() local
43 const float32x4_t vrsqrtxKLMN = vrsqrteq_f32(vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
56 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
112 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
Dneonfma-nr1rsqrts1fma1adj-x40.c35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local
46 float32x4_t vrsqrtxKLMN = vrsqrteq_f32(vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
68 const float32x4_t vcorrectionKLMN = vrsqrtsq_f32(vxKLMN, vrxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
95 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
143 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
Dneonfma-nr2fma1adj-x32.c35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() local
44 const float32x4_t vrsqrtxKLMN = vrsqrteq_f32(vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
58 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
122 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
Dneonfma-nr2fma1adj-x36.c35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() local
45 const float32x4_t vrsqrtxKLMN = vrsqrteq_f32(vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
60 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
132 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
Dneonfma-nr2fma1adj-x40.c35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local
46 const float32x4_t vrsqrtxKLMN = vrsqrteq_f32(vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
62 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
142 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-neon-rr2-p6-x24.c51 float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_velu_ukernel__neon_rr2_p6_x24() local
58 const float32x4_t vzKLMN = vmaxq_f32(vmulq_f32(vxKLMN, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
166 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
167 vxKLMN = vmulq_f32(vxKLMN, vbeta); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
174 const float32x4_t vyKLMN = vbslq_f32(vmKLMN, veKLMN, vxKLMN); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
Dvelu-neonfma-rr1-p6-x24.c50 float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() local
57 const float32x4_t vzKLMN = vmaxq_f32(vmulq_f32(vxKLMN, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24()
158 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24()
159 vxKLMN = vmulq_f32(vxKLMN, vbeta); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24()
166 const float32x4_t vyKLMN = vbslq_f32(vmKLMN, veKLMN, vxKLMN); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24()
Dvelu-sse41-rr2-p6-x24.c51 __m128 vxKLMN = _mm_loadu_ps(x + 20); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() local
59 const __m128 vzKLMN = _mm_max_ps(vsat_cutoff, _mm_mul_ps(vxKLMN, vprescale)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
163 vxKLMN = _mm_mul_ps(vxKLMN, vbeta); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
170 const __m128 vyKLMN = _mm_blendv_ps(vxKLMN, veKLMN, vxKLMN); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
Dvelu-wasmsimd-arm-rr2-p6-x24.c51 v128_t vxKLMN = wasm_v128_load(x + 20); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() local
59 const v128_t vzKLMN = wasm_f32x4_max(wasm_f32x4_mul(vxKLMN, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
168 const v128_t vsignmKLMN = wasm_i32x4_shr(vxKLMN, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
169 vxKLMN = wasm_f32x4_mul(vxKLMN, vbeta); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
176 const v128_t vyKLMN = wasm_v128_bitselect(veKLMN, vxKLMN, vsignmKLMN); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
Dvelu-wasmsimd-x86-rr2-p6-x24.c51 v128_t vxKLMN = wasm_v128_load(x + 20); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() local
59 const v128_t vzKLMN = wasm_f32x4_mul(vxKLMN, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
186 const v128_t vsignmKLMN = wasm_i32x4_shr(vxKLMN, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
187 vxKLMN = wasm_f32x4_mul(vxKLMN, vbeta); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
194 const v128_t vyKLMN = wasm_v128_bitselect(veKLMN, vxKLMN, vsignmKLMN); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
Dvelu-sse2-rr2-p6-x24.c51 __m128 vxKLMN = _mm_loadu_ps(x + 20); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() local
59 const __m128 vzKLMN = _mm_max_ps(vsat_cutoff, _mm_mul_ps(vxKLMN, vprescale)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
168 …t __m128 vmKLMN = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vxKLMN))); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
169 vxKLMN = _mm_mul_ps(vxKLMN, vbeta); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
176 const __m128 vyKLMN = _mm_or_ps(_mm_and_ps(veKLMN, vmKLMN), _mm_andnot_ps(vmKLMN, vxKLMN)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
Dvelu-wasmsimd-arm-rr2-lut16-p3-x24.c51 v128_t vxKLMN = wasm_v128_load(x + 20); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24() local
59 const v128_t vzKLMN = wasm_f32x4_max(wasm_f32x4_mul(vxKLMN, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24()
202 const v128_t vsignmKLMN = wasm_i32x4_shr(vxKLMN, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24()
203 vxKLMN = wasm_f32x4_mul(vxKLMN, vbeta); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24()
210 const v128_t vyKLMN = wasm_v128_bitselect(veKLMN, vxKLMN, vsignmKLMN); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24()
Dvelu-neonfma-rr1-lut16-p3-x24.c50 float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() local
57 const float32x4_t vzKLMN = vmaxq_f32(vmulq_f32(vxKLMN, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24()
193 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24()
194 vxKLMN = vmulq_f32(vxKLMN, vbeta); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24()
201 const float32x4_t vyKLMN = vbslq_f32(vmKLMN, veKLMN, vxKLMN); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24()
Dvelu-wasmsimd-x86-rr2-lut16-p3-x24.c51 v128_t vxKLMN = wasm_v128_load(x + 20); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() local
59 const v128_t vzKLMN = wasm_f32x4_mul(vxKLMN, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24()
220 const v128_t vsignmKLMN = wasm_i32x4_shr(vxKLMN, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24()
221 vxKLMN = wasm_f32x4_mul(vxKLMN, vbeta); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24()
228 const v128_t vyKLMN = wasm_v128_bitselect(veKLMN, vxKLMN, vsignmKLMN); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24()
Dvelu-neon-rr2-lut16-p3-x24.c51 float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() local
58 const float32x4_t vzKLMN = vmaxq_f32(vmulq_f32(vxKLMN, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24()
201 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24()
202 vxKLMN = vmulq_f32(vxKLMN, vbeta); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24()
209 const float32x4_t vyKLMN = vbslq_f32(vmKLMN, veKLMN, vxKLMN); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24()
Dvelu-sse41-rr2-lut16-p3-x24.c51 __m128 vxKLMN = _mm_loadu_ps(x + 20); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() local
59 const __m128 vzKLMN = _mm_max_ps(vsat_cutoff, _mm_mul_ps(vxKLMN, vprescale)); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
254 vxKLMN = _mm_mul_ps(vxKLMN, vbeta); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
261 const __m128 vyKLMN = _mm_blendv_ps(vxKLMN, veKLMN, vxKLMN); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
/external/XNNPACK/src/f32-sigmoid/gen/
Dneonfma-rr1-p5-div-x24.c43 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() local
50 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24()
141 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24()
148 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24()
Dneonfma-rr1-p5-nr2recps-x24.c43 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() local
50 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24()
162 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24()
169 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24()
Dneonfma-rr1-p5-nr1recps1fma-x24.c43 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() local
50 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24()
162 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24()
169 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24()
Dneonfma-rr1-p5-nr2fma-x24.c43 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() local
50 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24()
162 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24()
169 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24()

123