/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-x86-rr2-p6-x16.c | 78 v128_t vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() local 87 vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_lo), vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 96 vtCDEF = wasm_v128_andnot(vtCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 97 v128_t vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtCDEF), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 102 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 107 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 112 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 117 vpCDEF = wasm_f32x4_mul(vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 125 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 131 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16()
|
D | velu-wasmsimd-arm-rr2-p6-x16.c | 75 v128_t vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() local 80 vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_lo), vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 85 v128_t vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtCDEF), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 90 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 95 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 100 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 105 vpCDEF = wasm_f32x4_mul(vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 113 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 119 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16()
|
D | velu-sse41-rr2-p6-x16.c | 75 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() local 80 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_lo), vtCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 85 __m128 vpCDEF = _mm_add_ps(_mm_mul_ps(vc6, vtCDEF), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 90 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 95 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 100 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 105 vpCDEF = _mm_mul_ps(vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 113 vtCDEF = _mm_mul_ps(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 119 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vtCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16()
|
D | velu-neon-rr2-p6-x16.c | 73 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() local 78 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 83 float32x4_t vpCDEF = vmlaq_f32(vc5, vc6, vtCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 88 vpCDEF = vmlaq_f32(vc4, vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 93 vpCDEF = vmlaq_f32(vc3, vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 98 vpCDEF = vmlaq_f32(vc2, vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 103 vpCDEF = vmulq_f32(vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 111 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 117 vpCDEF = vmlaq_f32(vtCDEF, vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x16()
|
D | velu-wasmsimd-x86-rr2-p6-x20.c | 83 v128_t vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() local 94 vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_lo), vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 105 vtCDEF = wasm_v128_andnot(vtCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 106 v128_t vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtCDEF), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 113 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 119 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 125 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 131 vpCDEF = wasm_f32x4_mul(vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 140 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 148 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
|
D | velu-sse2-rr2-p6-x16.c | 75 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() local 80 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_lo), vtCDEF); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 85 __m128 vpCDEF = _mm_add_ps(_mm_mul_ps(vc6, vtCDEF), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 90 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 95 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 100 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 105 vpCDEF = _mm_mul_ps(vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 113 vtCDEF = _mm_mul_ps(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 119 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vtCDEF); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16()
|
D | velu-wasmsimd-arm-rr2-p6-x20.c | 80 v128_t vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() local 86 vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_lo), vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 92 v128_t vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtCDEF), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 98 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 104 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 110 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 116 vpCDEF = wasm_f32x4_mul(vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 125 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 133 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
|
D | velu-sse41-rr2-p6-x20.c | 80 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() local 86 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_lo), vtCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 92 __m128 vpCDEF = _mm_add_ps(_mm_mul_ps(vc6, vtCDEF), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 98 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 104 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 110 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 116 vpCDEF = _mm_mul_ps(vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 125 vtCDEF = _mm_mul_ps(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 133 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vtCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
|
D | velu-neon-rr2-p6-x20.c | 78 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() local 84 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 90 float32x4_t vpCDEF = vmlaq_f32(vc5, vc6, vtCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 96 vpCDEF = vmlaq_f32(vc4, vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 102 vpCDEF = vmlaq_f32(vc3, vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 108 vpCDEF = vmlaq_f32(vc2, vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 114 vpCDEF = vmulq_f32(vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 123 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 131 vpCDEF = vmlaq_f32(vtCDEF, vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
|
D | velu-wasmsimd-x86-rr2-p6-x24.c | 88 v128_t vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() local 101 vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_lo), vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 114 vtCDEF = wasm_v128_andnot(vtCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 115 v128_t vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtCDEF), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 124 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 131 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 138 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 145 vpCDEF = wasm_f32x4_mul(vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 155 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 165 vpCDEF = wasm_f32x4_add(wasm_f32x4_mul(vpCDEF, vtCDEF), vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
|
D | velu-neonfma-rr1-p6-x16.c | 72 float32x4_t vtCDEF = vfmaq_f32(vzCDEF, vnCDEF, vminus_ln2); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() local 77 float32x4_t vpCDEF = vfmaq_f32(vc5, vc6, vtCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 82 vpCDEF = vfmaq_f32(vc4, vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 87 vpCDEF = vfmaq_f32(vc3, vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 92 vpCDEF = vfmaq_f32(vc2, vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 97 vpCDEF = vmulq_f32(vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 105 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 111 vpCDEF = vfmaq_f32(vtCDEF, vpCDEF, vtCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neon-p5-x16.c | 86 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() local 91 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 97 float32x4_t vpCDEF = vmlaq_f32(vc4, vc5, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 102 vpCDEF = vmlaq_f32(vc3, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 107 vpCDEF = vmlaq_f32(vc2, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 112 vpCDEF = vmlaq_f32(vc1, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 121 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 126 float32x4_t vfCDEF = vmlaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
|
D | wasmsimd-p5-x16.c | 82 v128_t vtCDEF = wasm_f32x4_add(vxCDEF, wasm_f32x4_mul(vnCDEF, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16() local 87 vtCDEF = wasm_f32x4_add(vtCDEF, wasm_f32x4_mul(vnCDEF, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16() 93 v128_t vpCDEF = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vtCDEF)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16() 98 vpCDEF = wasm_f32x4_add(vc3, wasm_f32x4_mul(vpCDEF, vtCDEF)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16() 103 vpCDEF = wasm_f32x4_add(vc2, wasm_f32x4_mul(vpCDEF, vtCDEF)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16() 108 vpCDEF = wasm_f32x4_add(vc1, wasm_f32x4_mul(vpCDEF, vtCDEF)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16() 117 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16() 122 v128_t vfCDEF = wasm_f32x4_add(vsCDEF, wasm_f32x4_mul(vtCDEF, vpCDEF)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16()
|
D | neonfma-p5-x16.c | 85 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() local 90 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 96 float32x4_t vpCDEF = vfmaq_f32(vc4, vc5, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 101 vpCDEF = vfmaq_f32(vc3, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 106 vpCDEF = vfmaq_f32(vc2, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 111 vpCDEF = vfmaq_f32(vc1, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 120 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 125 float32x4_t vfCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16()
|
D | sse2-p5-x16-acc4.c | 85 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() local 90 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_lo), vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 96 __m128 vpCDEF = _mm_add_ps(_mm_mul_ps(vc5, vtCDEF), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 101 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 106 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 111 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 120 vtCDEF = _mm_mul_ps(vtCDEF, vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 125 __m128 vfCDEF = _mm_add_ps(_mm_mul_ps(vtCDEF, vpCDEF), vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
|
D | neonfma-p5-x16-acc2.c | 86 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() local 91 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 97 float32x4_t vpCDEF = vfmaq_f32(vc4, vc5, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 102 vpCDEF = vfmaq_f32(vc3, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 107 vpCDEF = vfmaq_f32(vc2, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 112 vpCDEF = vfmaq_f32(vc1, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 121 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 126 float32x4_t vfCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2()
|
D | neon-p5-x16-acc2.c | 87 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() local 92 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 98 float32x4_t vpCDEF = vmlaq_f32(vc4, vc5, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 103 vpCDEF = vmlaq_f32(vc3, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 108 vpCDEF = vmlaq_f32(vc2, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 113 vpCDEF = vmlaq_f32(vc1, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 122 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 127 float32x4_t vfCDEF = vmlaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
|
D | wasmsimd-p5-x16-acc2.c | 83 v128_t vtCDEF = wasm_f32x4_add(vxCDEF, wasm_f32x4_mul(vnCDEF, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc2() local 88 vtCDEF = wasm_f32x4_add(vtCDEF, wasm_f32x4_mul(vnCDEF, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc2() 94 v128_t vpCDEF = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vtCDEF)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc2() 99 vpCDEF = wasm_f32x4_add(vc3, wasm_f32x4_mul(vpCDEF, vtCDEF)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc2() 104 vpCDEF = wasm_f32x4_add(vc2, wasm_f32x4_mul(vpCDEF, vtCDEF)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc2() 109 vpCDEF = wasm_f32x4_add(vc1, wasm_f32x4_mul(vpCDEF, vtCDEF)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc2() 118 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc2() 123 v128_t vfCDEF = wasm_f32x4_add(vsCDEF, wasm_f32x4_mul(vtCDEF, vpCDEF)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc2()
|
D | sse2-p5-x16-acc2.c | 83 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() local 88 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_lo), vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 94 __m128 vpCDEF = _mm_add_ps(_mm_mul_ps(vc5, vtCDEF), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 99 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 104 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 109 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 118 vtCDEF = _mm_mul_ps(vtCDEF, vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 123 __m128 vfCDEF = _mm_add_ps(_mm_mul_ps(vtCDEF, vpCDEF), vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
|
D | wasmsimd-p5-x16-acc4.c | 85 v128_t vtCDEF = wasm_f32x4_add(vxCDEF, wasm_f32x4_mul(vnCDEF, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() local 90 vtCDEF = wasm_f32x4_add(vtCDEF, wasm_f32x4_mul(vnCDEF, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 96 v128_t vpCDEF = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vtCDEF)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 101 vpCDEF = wasm_f32x4_add(vc3, wasm_f32x4_mul(vpCDEF, vtCDEF)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 106 vpCDEF = wasm_f32x4_add(vc2, wasm_f32x4_mul(vpCDEF, vtCDEF)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 111 vpCDEF = wasm_f32x4_add(vc1, wasm_f32x4_mul(vpCDEF, vtCDEF)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 120 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 125 v128_t vfCDEF = wasm_f32x4_add(vsCDEF, wasm_f32x4_mul(vtCDEF, vpCDEF)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4()
|
D | neonfma-p5-x16-acc4.c | 88 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() local 93 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 99 float32x4_t vpCDEF = vfmaq_f32(vc4, vc5, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 104 vpCDEF = vfmaq_f32(vc3, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 109 vpCDEF = vfmaq_f32(vc2, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 114 vpCDEF = vfmaq_f32(vc1, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 123 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 128 float32x4_t vfCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4()
|
D | neon-p5-x16-acc4.c | 89 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() local 94 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 100 float32x4_t vpCDEF = vmlaq_f32(vc4, vc5, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 105 vpCDEF = vmlaq_f32(vc3, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 110 vpCDEF = vmlaq_f32(vc2, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 115 vpCDEF = vmlaq_f32(vc1, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 124 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 129 float32x4_t vfCDEF = vmlaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4()
|
D | sse2-p5-x16.c | 82 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() local 87 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_lo), vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 93 __m128 vpCDEF = _mm_add_ps(_mm_mul_ps(vc5, vtCDEF), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 98 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 103 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 108 vpCDEF = _mm_add_ps(_mm_mul_ps(vpCDEF, vtCDEF), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 117 vtCDEF = _mm_mul_ps(vtCDEF, vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 122 __m128 vfCDEF = _mm_add_ps(_mm_mul_ps(vtCDEF, vpCDEF), vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
|
D | neonfma-p5-x20.c | 90 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() local 96 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 103 float32x4_t vpCDEF = vfmaq_f32(vc4, vc5, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 109 vpCDEF = vfmaq_f32(vc3, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 115 vpCDEF = vfmaq_f32(vc2, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 121 vpCDEF = vfmaq_f32(vc1, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 131 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 137 float32x4_t vfCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | wasmsimd-p5-div-x16.c | 68 v128_t vtCDEF = wasm_f32x4_add(vzCDEF, wasm_f32x4_mul(vnCDEF, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() local 73 vtCDEF = wasm_f32x4_add(vtCDEF, wasm_f32x4_mul(vnCDEF, vln2_lo)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 78 v128_t vpCDEF = wasm_f32x4_add(vc4, wasm_f32x4_mul(vtCDEF, vc5)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 83 vpCDEF = wasm_f32x4_add(vc3, wasm_f32x4_mul(vtCDEF, vpCDEF)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 88 vpCDEF = wasm_f32x4_add(vc2, wasm_f32x4_mul(vtCDEF, vpCDEF)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 93 vpCDEF = wasm_f32x4_add(vc1, wasm_f32x4_mul(vtCDEF, vpCDEF)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 98 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 103 const v128_t veCDEF = wasm_f32x4_add(vsCDEF, wasm_f32x4_mul(vtCDEF, vpCDEF)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16()
|