Home
last modified time | relevance | path

Searched refs:vtGHIJ (Results 1 – 25 of 85) sorted by relevance

1234

/external/XNNPACK/src/f32-velu/gen/
Dvelu-wasmsimd-x86-rr2-p6-x20.c84 v128_t vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() local
95 vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
106 vtGHIJ = wasm_v128_andnot(vtGHIJ, vsatmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
107 v128_t vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
113 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
119 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
125 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
131 vpGHIJ = wasm_f32x4_mul(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
141 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
148 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
Dvelu-sse41-rr2-p6-x20.c80 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() local
86 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
92 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
98 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
104 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
110 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
116 vpGHIJ = _mm_mul_ps(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
126 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
133 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
Dvelu-neon-rr2-p6-x20.c78 float32x4_t vtGHIJ = vmlaq_f32(vzGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() local
84 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
90 float32x4_t vpGHIJ = vmlaq_f32(vc5, vc6, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
96 vpGHIJ = vmlaq_f32(vc4, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
102 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
108 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
114 vpGHIJ = vmulq_f32(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
124 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
131 vpGHIJ = vmlaq_f32(vtGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
Dvelu-wasmsimd-arm-rr2-p6-x20.c80 v128_t vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() local
86 vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
92 v128_t vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
98 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
104 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
110 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
116 vpGHIJ = wasm_f32x4_mul(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
126 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
133 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
Dvelu-wasmsimd-x86-rr2-p6-x24.c89 v128_t vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() local
102 vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
115 vtGHIJ = wasm_v128_andnot(vtGHIJ, vsatmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
116 v128_t vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
124 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
131 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
138 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
145 vpGHIJ = wasm_f32x4_mul(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
156 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
165 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
Dvelu-neon-rr2-p6-x24.c83 float32x4_t vtGHIJ = vmlaq_f32(vzGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() local
90 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
97 float32x4_t vpGHIJ = vmlaq_f32(vc5, vc6, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
104 vpGHIJ = vmlaq_f32(vc4, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
111 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
118 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
125 vpGHIJ = vmulq_f32(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
136 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
145 vpGHIJ = vmlaq_f32(vtGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
Dvelu-sse2-rr2-p6-x20.c80 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() local
86 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20()
92 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20()
98 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20()
104 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20()
110 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20()
116 vpGHIJ = _mm_mul_ps(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20()
126 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20()
133 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20()
Dvelu-sse41-rr2-p6-x24.c85 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() local
92 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
99 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
106 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
113 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
120 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
127 vpGHIJ = _mm_mul_ps(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
138 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
147 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
Dvelu-wasmsimd-arm-rr2-p6-x24.c85 v128_t vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() local
92 vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
99 v128_t vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
106 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
113 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
120 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
127 vpGHIJ = wasm_f32x4_mul(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
138 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
147 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
Dvelu-sse2-rr2-p6-x24.c85 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() local
92 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
99 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
106 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
113 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
120 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
127 vpGHIJ = _mm_mul_ps(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
138 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
147 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
Dvelu-neonfma-rr1-p6-x20.c77 float32x4_t vtGHIJ = vfmaq_f32(vzGHIJ, vnGHIJ, vminus_ln2); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() local
83 float32x4_t vpGHIJ = vfmaq_f32(vc5, vc6, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
89 vpGHIJ = vfmaq_f32(vc4, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
95 vpGHIJ = vfmaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
101 vpGHIJ = vfmaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
107 vpGHIJ = vmulq_f32(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
117 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
124 vpGHIJ = vfmaq_f32(vtGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dneon-rr2-p5-x20.c76 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20() local
82 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20()
88 float32x4_t vpGHIJ = vmlaq_f32(vc4, vc5, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20()
94 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20()
100 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20()
106 vpGHIJ = vmlaq_f32(vc1, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20()
112 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20()
118 float32x4_t vfGHIJ = vmlaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20()
Dwasmsimd-rr2-p5-x20-acc2.c86 v128_t vtGHIJ = wasm_f32x4_add(vxGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2() local
92 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2()
99 v128_t vpGHIJ = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2()
105 vpGHIJ = wasm_f32x4_add(vc3, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2()
111 vpGHIJ = wasm_f32x4_add(vc2, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2()
117 vpGHIJ = wasm_f32x4_add(vc1, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2()
127 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2()
133 v128_t vfGHIJ = wasm_f32x4_add(vsGHIJ, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2()
Dsse2-rr2-p5-x20-acc2.c86 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2() local
92 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2()
99 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc5, vtGHIJ), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2()
105 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2()
111 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2()
117 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2()
127 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2()
133 __m128 vfGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2()
Dwasmsimd-rr2-p5-x20-acc5.c89 v128_t vtGHIJ = wasm_f32x4_add(vxGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5() local
95 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5()
102 v128_t vpGHIJ = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5()
108 vpGHIJ = wasm_f32x4_add(vc3, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5()
114 vpGHIJ = wasm_f32x4_add(vc2, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5()
120 vpGHIJ = wasm_f32x4_add(vc1, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5()
130 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5()
136 v128_t vfGHIJ = wasm_f32x4_add(vsGHIJ, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5()
Dsse2-rr2-p5-x20-acc5.c89 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5() local
95 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5()
102 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc5, vtGHIJ), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5()
108 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5()
114 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5()
120 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5()
130 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5()
136 __m128 vfGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5()
Dneon-rr2-p5-x20-acc5.c80 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5() local
86 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5()
92 float32x4_t vpGHIJ = vmlaq_f32(vc4, vc5, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5()
98 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5()
104 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5()
110 vpGHIJ = vmlaq_f32(vc1, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5()
116 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5()
122 float32x4_t vfGHIJ = vmlaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5()
Dsse2-rr2-p5-x20.c85 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20() local
91 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20()
98 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc5, vtGHIJ), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20()
104 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20()
110 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20()
116 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20()
126 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20()
132 __m128 vfGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20()
Dneon-rr2-p5-x20-acc2.c77 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2() local
83 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2()
89 float32x4_t vpGHIJ = vmlaq_f32(vc4, vc5, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2()
95 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2()
101 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2()
107 vpGHIJ = vmlaq_f32(vc1, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2()
113 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2()
119 float32x4_t vfGHIJ = vmlaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2()
Dwasmsimd-rr2-p5-x20.c85 v128_t vtGHIJ = wasm_f32x4_add(vxGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20() local
91 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20()
98 v128_t vpGHIJ = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20()
104 vpGHIJ = wasm_f32x4_add(vc3, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20()
110 vpGHIJ = wasm_f32x4_add(vc2, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20()
116 vpGHIJ = wasm_f32x4_add(vc1, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20()
126 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20()
132 v128_t vfGHIJ = wasm_f32x4_add(vsGHIJ, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20()
/external/XNNPACK/src/f32-vsigmoid/gen/
Dvsigmoid-wasmsimd-rr2-p5-div-x20.c74 v128_t vtGHIJ = wasm_f32x4_add(vzGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() local
80 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_lo)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20()
86 v128_t vpGHIJ = wasm_f32x4_add(vc4, wasm_f32x4_mul(vtGHIJ, vc5)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20()
92 vpGHIJ = wasm_f32x4_add(vc3, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20()
98 vpGHIJ = wasm_f32x4_add(vc2, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20()
104 vpGHIJ = wasm_f32x4_add(vc1, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20()
110 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20()
116 const v128_t veGHIJ = wasm_f32x4_add(vsGHIJ, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20()
Dvsigmoid-sse41-rr2-p5-div-x20.c74 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() local
80 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20()
86 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc5, vtGHIJ), vc4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20()
92 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20()
98 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20()
104 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20()
110 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20()
116 __m128 veGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20()
Dvsigmoid-sse2-rr2-p5-div-x20.c74 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20() local
80 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20()
86 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc5, vtGHIJ), vc4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20()
92 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20()
98 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20()
104 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc1); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20()
110 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20()
116 __m128 veGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20()
Dvsigmoid-sse41-rr2-p5-div-x24.c79 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() local
86 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24()
93 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc5, vtGHIJ), vc4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24()
100 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24()
107 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24()
114 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24()
121 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24()
128 __m128 veGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24()
Dvsigmoid-neon-rr2-p5-nr2recps-x20.c73 float32x4_t vtGHIJ = vmlaq_f32(vzGHIJ, vnGHIJ, vln2_hi); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20() local
79 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vln2_lo); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
85 float32x4_t vpGHIJ = vmlaq_f32(vc4, vc5, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
91 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
97 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
103 vpGHIJ = vmlaq_f32(vc1, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
109 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
115 const float32x4_t veGHIJ = vmlaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20()

1234