/external/XNNPACK/src/f32-velu/gen/ |
D | velu-neonfma-rr1-p6-x12.c | 70 float32x4_t vp89AB = vfmaq_f32(vc5, vc6, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() local 74 vp89AB = vfmaq_f32(vc4, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 78 vp89AB = vfmaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 82 vp89AB = vfmaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 86 vp89AB = vmulq_f32(vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 97 vp89AB = vfmaq_f32(vt89AB, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 101 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12()
|
D | velu-neon-rr2-p6-x12.c | 75 float32x4_t vp89AB = vmlaq_f32(vc5, vc6, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() local 79 vp89AB = vmlaq_f32(vc4, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 83 vp89AB = vmlaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 87 vp89AB = vmlaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 91 vp89AB = vmulq_f32(vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 102 vp89AB = vmlaq_f32(vt89AB, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 106 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x12()
|
D | velu-wasmsimd-arm-rr2-p6-x12.c | 77 v128_t vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt89AB), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() local 81 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 85 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 89 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 93 vp89AB = wasm_f32x4_mul(vp89AB, vt89AB); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 104 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vt89AB); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 108 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12()
|
D | velu-sse2-rr2-p6-x12.c | 77 __m128 vp89AB = _mm_add_ps(_mm_mul_ps(vc6, vt89AB), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() local 81 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 85 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 89 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 93 vp89AB = _mm_mul_ps(vp89AB, vt89AB); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 104 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vt89AB); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 108 const __m128 ve89AB = _mm_mul_ps(_mm_add_ps(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12()
|
D | velu-sse41-rr2-p6-x12.c | 77 __m128 vp89AB = _mm_add_ps(_mm_mul_ps(vc6, vt89AB), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() local 81 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 85 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 89 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 93 vp89AB = _mm_mul_ps(vp89AB, vt89AB); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 104 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vt89AB); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 108 const __m128 ve89AB = _mm_mul_ps(_mm_add_ps(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12()
|
D | velu-neonfma-rr1-p6-x16.c | 76 float32x4_t vp89AB = vfmaq_f32(vc5, vc6, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() local 81 vp89AB = vfmaq_f32(vc4, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 86 vp89AB = vfmaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 91 vp89AB = vfmaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 96 vp89AB = vmulq_f32(vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 110 vp89AB = vfmaq_f32(vt89AB, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 115 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16()
|
D | velu-wasmsimd-arm-rr2-p6-x16.c | 84 v128_t vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt89AB), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() local 89 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 94 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 99 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 104 vp89AB = wasm_f32x4_mul(vp89AB, vt89AB); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 118 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vt89AB); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 123 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16()
|
D | velu-wasmsimd-x86-rr2-p6-x12.c | 86 v128_t vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt89AB), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() local 90 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 94 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 98 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 102 vp89AB = wasm_f32x4_mul(vp89AB, vt89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 113 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vt89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 117 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12()
|
D | velu-sse41-rr2-p6-x16.c | 84 __m128 vp89AB = _mm_add_ps(_mm_mul_ps(vc6, vt89AB), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() local 89 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 94 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 99 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 104 vp89AB = _mm_mul_ps(vp89AB, vt89AB); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 118 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vt89AB); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 123 const __m128 ve89AB = _mm_mul_ps(_mm_add_ps(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16()
|
D | velu-neon-rr2-p6-x16.c | 82 float32x4_t vp89AB = vmlaq_f32(vc5, vc6, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() local 87 vp89AB = vmlaq_f32(vc4, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 92 vp89AB = vmlaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 97 vp89AB = vmlaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 102 vp89AB = vmulq_f32(vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 116 vp89AB = vmlaq_f32(vt89AB, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 121 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x16()
|
D | velu-neonfma-rr1-p6-x20.c | 82 float32x4_t vp89AB = vfmaq_f32(vc5, vc6, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() local 88 vp89AB = vfmaq_f32(vc4, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 94 vp89AB = vfmaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 100 vp89AB = vfmaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 106 vp89AB = vmulq_f32(vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 123 vp89AB = vfmaq_f32(vt89AB, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 129 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
|
D | velu-sse2-rr2-p6-x16.c | 84 __m128 vp89AB = _mm_add_ps(_mm_mul_ps(vc6, vt89AB), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() local 89 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 94 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 99 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 104 vp89AB = _mm_mul_ps(vp89AB, vt89AB); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 118 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vt89AB); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 123 const __m128 ve89AB = _mm_mul_ps(_mm_add_ps(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16()
|
D | velu-wasmsimd-arm-rr2-p6-x20.c | 91 v128_t vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt89AB), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() local 97 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 103 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 109 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 115 vp89AB = wasm_f32x4_mul(vp89AB, vt89AB); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 132 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vt89AB); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 138 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
|
D | velu-sse41-rr2-p6-x20.c | 91 __m128 vp89AB = _mm_add_ps(_mm_mul_ps(vc6, vt89AB), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() local 97 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 103 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 109 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 115 vp89AB = _mm_mul_ps(vp89AB, vt89AB); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 132 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vt89AB); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 138 const __m128 ve89AB = _mm_mul_ps(_mm_add_ps(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
|
D | velu-neon-rr2-p6-x20.c | 89 float32x4_t vp89AB = vmlaq_f32(vc5, vc6, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() local 95 vp89AB = vmlaq_f32(vc4, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 101 vp89AB = vmlaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 107 vp89AB = vmlaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 113 vp89AB = vmulq_f32(vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 130 vp89AB = vmlaq_f32(vt89AB, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 136 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
|
D | velu-wasmsimd-x86-rr2-p6-x16.c | 95 v128_t vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt89AB), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() local 101 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 106 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 111 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 116 vp89AB = wasm_f32x4_mul(vp89AB, vt89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 130 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vt89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 135 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16()
|
D | velu-neon-rr2-p6-x24.c | 96 float32x4_t vp89AB = vmlaq_f32(vc5, vc6, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() local 103 vp89AB = vmlaq_f32(vc4, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 110 vp89AB = vmlaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 117 vp89AB = vmlaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 124 vp89AB = vmulq_f32(vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 144 vp89AB = vmlaq_f32(vt89AB, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 151 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
|
D | velu-wasmsimd-x86-rr2-p6-x20.c | 104 v128_t vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt89AB), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() local 112 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 118 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 124 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 130 vp89AB = wasm_f32x4_mul(vp89AB, vt89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 147 vp89AB = wasm_f32x4_add(wasm_f32x4_mul(vp89AB, vt89AB), vt89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 153 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
|
D | velu-sse2-rr2-p6-x20.c | 91 __m128 vp89AB = _mm_add_ps(_mm_mul_ps(vc6, vt89AB), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() local 97 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 103 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 109 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 115 vp89AB = _mm_mul_ps(vp89AB, vt89AB); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 132 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vt89AB); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 138 const __m128 ve89AB = _mm_mul_ps(_mm_add_ps(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20()
|
D | velu-neonfma-rr1-p6-x24.c | 88 float32x4_t vp89AB = vfmaq_f32(vc5, vc6, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() local 95 vp89AB = vfmaq_f32(vc4, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 102 vp89AB = vfmaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 109 vp89AB = vfmaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 116 vp89AB = vmulq_f32(vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 136 vp89AB = vfmaq_f32(vt89AB, vp89AB, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 143 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | wasmsimd-p5-x12.c | 85 v128_t vp89AB = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vt89AB)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x12() local 89 vp89AB = wasm_f32x4_add(vc3, wasm_f32x4_mul(vp89AB, vt89AB)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x12() 93 vp89AB = wasm_f32x4_add(vc2, wasm_f32x4_mul(vp89AB, vt89AB)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x12() 97 vp89AB = wasm_f32x4_add(vc1, wasm_f32x4_mul(vp89AB, vt89AB)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x12() 109 v128_t vf89AB = wasm_f32x4_add(vs89AB, wasm_f32x4_mul(vt89AB, vp89AB)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x12()
|
D | neonfma-p5-x12.c | 88 float32x4_t vp89AB = vfmaq_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() local 92 vp89AB = vfmaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 96 vp89AB = vfmaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 100 vp89AB = vfmaq_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 112 float32x4_t vf89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12()
|
D | neon-p5-x12-acc2.c | 90 float32x4_t vp89AB = vmlaq_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() local 94 vp89AB = vmlaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 98 vp89AB = vmlaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 102 vp89AB = vmlaq_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 114 float32x4_t vf89AB = vmlaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
|
D | neonfma-p5-x12-acc2.c | 89 float32x4_t vp89AB = vfmaq_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() local 93 vp89AB = vfmaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 97 vp89AB = vfmaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 101 vp89AB = vfmaq_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 113 float32x4_t vf89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2()
|
D | wasmsimd-p5-x12-acc2.c | 86 v128_t vp89AB = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vt89AB)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x12_acc2() local 90 vp89AB = wasm_f32x4_add(vc3, wasm_f32x4_mul(vp89AB, vt89AB)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x12_acc2() 94 vp89AB = wasm_f32x4_add(vc2, wasm_f32x4_mul(vp89AB, vt89AB)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x12_acc2() 98 vp89AB = wasm_f32x4_add(vc1, wasm_f32x4_mul(vp89AB, vt89AB)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x12_acc2() 110 v128_t vf89AB = wasm_f32x4_add(vs89AB, wasm_f32x4_mul(vt89AB, vp89AB)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x12_acc2()
|