/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-x86-rr2-p6-x20.c | 84 v128_t vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() local 95 vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 106 vtGHIJ = wasm_v128_andnot(vtGHIJ, vsatmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 107 v128_t vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 113 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 119 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 125 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 131 vpGHIJ = wasm_f32x4_mul(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 141 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 148 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
|
D | velu-sse41-rr2-p6-x20.c | 80 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() local 86 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 92 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 98 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 104 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 110 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 116 vpGHIJ = _mm_mul_ps(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 126 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 133 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
|
D | velu-neon-rr2-p6-x20.c | 78 float32x4_t vtGHIJ = vmlaq_f32(vzGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() local 84 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 90 float32x4_t vpGHIJ = vmlaq_f32(vc5, vc6, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 96 vpGHIJ = vmlaq_f32(vc4, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 102 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 108 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 114 vpGHIJ = vmulq_f32(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 124 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 131 vpGHIJ = vmlaq_f32(vtGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
|
D | velu-wasmsimd-arm-rr2-p6-x20.c | 80 v128_t vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() local 86 vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 92 v128_t vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 98 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 104 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 110 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 116 vpGHIJ = wasm_f32x4_mul(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 126 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 133 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
|
D | velu-wasmsimd-x86-rr2-p6-x24.c | 89 v128_t vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() local 102 vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 115 vtGHIJ = wasm_v128_andnot(vtGHIJ, vsatmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 116 v128_t vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 124 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 131 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 138 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 145 vpGHIJ = wasm_f32x4_mul(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 156 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 165 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
|
D | velu-neon-rr2-p6-x24.c | 83 float32x4_t vtGHIJ = vmlaq_f32(vzGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() local 90 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 97 float32x4_t vpGHIJ = vmlaq_f32(vc5, vc6, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 104 vpGHIJ = vmlaq_f32(vc4, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 111 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 118 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 125 vpGHIJ = vmulq_f32(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 136 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 145 vpGHIJ = vmlaq_f32(vtGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
|
D | velu-sse2-rr2-p6-x20.c | 80 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() local 86 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 92 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 98 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 104 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 110 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 116 vpGHIJ = _mm_mul_ps(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 126 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 133 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20()
|
D | velu-sse41-rr2-p6-x24.c | 85 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() local 92 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 99 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 106 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 113 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 120 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 127 vpGHIJ = _mm_mul_ps(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 138 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 147 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
|
D | velu-wasmsimd-arm-rr2-p6-x24.c | 85 v128_t vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() local 92 vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 99 v128_t vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 106 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 113 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 120 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 127 vpGHIJ = wasm_f32x4_mul(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 138 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 147 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
|
D | velu-sse2-rr2-p6-x24.c | 85 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() local 92 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 99 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 106 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 113 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 120 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 127 vpGHIJ = _mm_mul_ps(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 138 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 147 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
|
D | velu-neonfma-rr1-p6-x20.c | 77 float32x4_t vtGHIJ = vfmaq_f32(vzGHIJ, vnGHIJ, vminus_ln2); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() local 83 float32x4_t vpGHIJ = vfmaq_f32(vc5, vc6, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 89 vpGHIJ = vfmaq_f32(vc4, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 95 vpGHIJ = vfmaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 101 vpGHIJ = vfmaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 107 vpGHIJ = vmulq_f32(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 117 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 124 vpGHIJ = vfmaq_f32(vtGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neon-rr2-p5-x20.c | 76 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20() local 82 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20() 88 float32x4_t vpGHIJ = vmlaq_f32(vc4, vc5, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20() 94 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20() 100 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20() 106 vpGHIJ = vmlaq_f32(vc1, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20() 112 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20() 118 float32x4_t vfGHIJ = vmlaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20()
|
D | wasmsimd-rr2-p5-x20-acc2.c | 86 v128_t vtGHIJ = wasm_f32x4_add(vxGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2() local 92 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2() 99 v128_t vpGHIJ = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2() 105 vpGHIJ = wasm_f32x4_add(vc3, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2() 111 vpGHIJ = wasm_f32x4_add(vc2, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2() 117 vpGHIJ = wasm_f32x4_add(vc1, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2() 127 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2() 133 v128_t vfGHIJ = wasm_f32x4_add(vsGHIJ, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2()
|
D | sse2-rr2-p5-x20-acc2.c | 86 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2() local 92 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2() 99 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc5, vtGHIJ), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2() 105 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2() 111 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2() 117 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2() 127 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2() 133 __m128 vfGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2()
|
D | wasmsimd-rr2-p5-x20-acc5.c | 89 v128_t vtGHIJ = wasm_f32x4_add(vxGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5() local 95 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5() 102 v128_t vpGHIJ = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5() 108 vpGHIJ = wasm_f32x4_add(vc3, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5() 114 vpGHIJ = wasm_f32x4_add(vc2, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5() 120 vpGHIJ = wasm_f32x4_add(vc1, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5() 130 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5() 136 v128_t vfGHIJ = wasm_f32x4_add(vsGHIJ, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5()
|
D | sse2-rr2-p5-x20-acc5.c | 89 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5() local 95 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5() 102 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc5, vtGHIJ), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5() 108 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5() 114 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5() 120 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5() 130 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5() 136 __m128 vfGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5()
|
D | neon-rr2-p5-x20-acc5.c | 80 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5() local 86 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5() 92 float32x4_t vpGHIJ = vmlaq_f32(vc4, vc5, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5() 98 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5() 104 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5() 110 vpGHIJ = vmlaq_f32(vc1, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5() 116 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5() 122 float32x4_t vfGHIJ = vmlaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5()
|
D | sse2-rr2-p5-x20.c | 85 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20() local 91 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20() 98 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc5, vtGHIJ), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20() 104 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20() 110 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20() 116 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20() 126 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20() 132 __m128 vfGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20()
|
D | neon-rr2-p5-x20-acc2.c | 77 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2() local 83 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2() 89 float32x4_t vpGHIJ = vmlaq_f32(vc4, vc5, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2() 95 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2() 101 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2() 107 vpGHIJ = vmlaq_f32(vc1, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2() 113 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2() 119 float32x4_t vfGHIJ = vmlaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2()
|
D | wasmsimd-rr2-p5-x20.c | 85 v128_t vtGHIJ = wasm_f32x4_add(vxGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20() local 91 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20() 98 v128_t vpGHIJ = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20() 104 vpGHIJ = wasm_f32x4_add(vc3, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20() 110 vpGHIJ = wasm_f32x4_add(vc2, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20() 116 vpGHIJ = wasm_f32x4_add(vc1, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20() 126 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20() 132 v128_t vfGHIJ = wasm_f32x4_add(vsGHIJ, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20()
|
/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-wasmsimd-rr2-p5-div-x20.c | 74 v128_t vtGHIJ = wasm_f32x4_add(vzGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() local 80 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_lo)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() 86 v128_t vpGHIJ = wasm_f32x4_add(vc4, wasm_f32x4_mul(vtGHIJ, vc5)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() 92 vpGHIJ = wasm_f32x4_add(vc3, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() 98 vpGHIJ = wasm_f32x4_add(vc2, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() 104 vpGHIJ = wasm_f32x4_add(vc1, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() 110 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() 116 const v128_t veGHIJ = wasm_f32x4_add(vsGHIJ, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20()
|
D | vsigmoid-sse41-rr2-p5-div-x20.c | 74 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() local 80 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() 86 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc5, vtGHIJ), vc4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() 92 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() 98 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() 104 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() 110 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() 116 __m128 veGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20()
|
D | vsigmoid-sse2-rr2-p5-div-x20.c | 74 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20() local 80 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20() 86 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc5, vtGHIJ), vc4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20() 92 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20() 98 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20() 104 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc1); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20() 110 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20() 116 __m128 veGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20()
|
D | vsigmoid-sse41-rr2-p5-div-x24.c | 79 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() local 86 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() 93 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc5, vtGHIJ), vc4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() 100 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() 107 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() 114 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() 121 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() 128 __m128 veGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24()
|
D | vsigmoid-neon-rr2-p5-nr2recps-x20.c | 73 float32x4_t vtGHIJ = vmlaq_f32(vzGHIJ, vnGHIJ, vln2_hi); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20() local 79 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vln2_lo); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 85 float32x4_t vpGHIJ = vmlaq_f32(vc4, vc5, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 91 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 97 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 103 vpGHIJ = vmlaq_f32(vc1, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 109 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 115 const float32x4_t veGHIJ = vmlaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
|