/external/XNNPACK/src/f32-velu/gen/ |
D | velu-neonfma-rr1-p6-x8.c | 62 float32x4_t vp0123 = vfmaq_f32(vc5, vc6, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() local 65 vp0123 = vfmaq_f32(vc4, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 68 vp0123 = vfmaq_f32(vc3, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 71 vp0123 = vfmaq_f32(vc2, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 74 vp0123 = vmulq_f32(vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 82 vp0123 = vfmaq_f32(vt0123, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 85 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8()
|
D | velu-neon-rr2-p6-x8.c | 66 float32x4_t vp0123 = vmlaq_f32(vc5, vc6, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() local 69 vp0123 = vmlaq_f32(vc4, vp0123, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 72 vp0123 = vmlaq_f32(vc3, vp0123, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 75 vp0123 = vmlaq_f32(vc2, vp0123, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 78 vp0123 = vmulq_f32(vp0123, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 86 vp0123 = vmlaq_f32(vt0123, vp0123, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 89 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x8()
|
D | velu-sse41-rr2-p6-x8.c | 68 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc6, vt0123), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() local 71 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 74 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 77 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 80 vp0123 = _mm_mul_ps(vp0123, vt0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 88 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vt0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 91 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8()
|
D | velu-wasmsimd-arm-rr2-p6-x8.c | 68 v128_t vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt0123), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() local 71 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 74 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 77 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 80 vp0123 = wasm_f32x4_mul(vp0123, vt0123); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 88 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vt0123); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 91 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8()
|
D | velu-neonfma-rr1-p6-x12.c | 68 float32x4_t vp0123 = vfmaq_f32(vc5, vc6, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() local 72 vp0123 = vfmaq_f32(vc4, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 76 vp0123 = vfmaq_f32(vc3, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 80 vp0123 = vfmaq_f32(vc2, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 84 vp0123 = vmulq_f32(vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 95 vp0123 = vfmaq_f32(vt0123, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 99 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12()
|
D | velu-sse2-rr2-p6-x8.c | 68 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc6, vt0123), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() local 71 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 74 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 77 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 80 vp0123 = _mm_mul_ps(vp0123, vt0123); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 88 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vt0123); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 91 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8()
|
D | velu-wasmsimd-x86-rr2-p6-x8.c | 73 v128_t vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt0123), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() local 77 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 80 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 83 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 86 vp0123 = wasm_f32x4_mul(vp0123, vt0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 94 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vt0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 97 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8()
|
D | velu-neon-rr2-p6-x12.c | 73 float32x4_t vp0123 = vmlaq_f32(vc5, vc6, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() local 77 vp0123 = vmlaq_f32(vc4, vp0123, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 81 vp0123 = vmlaq_f32(vc3, vp0123, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 85 vp0123 = vmlaq_f32(vc2, vp0123, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 89 vp0123 = vmulq_f32(vp0123, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 100 vp0123 = vmlaq_f32(vt0123, vp0123, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 104 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x12()
|
D | velu-wasmsimd-arm-rr2-p6-x12.c | 75 v128_t vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt0123), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() local 79 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 83 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 87 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 91 vp0123 = wasm_f32x4_mul(vp0123, vt0123); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 102 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vt0123); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 106 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12()
|
D | velu-sse2-rr2-p6-x12.c | 75 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc6, vt0123), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() local 79 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 83 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 87 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 91 vp0123 = _mm_mul_ps(vp0123, vt0123); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 102 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vt0123); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 106 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12()
|
D | velu-sse41-rr2-p6-x12.c | 75 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc6, vt0123), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() local 79 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 83 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 87 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 91 vp0123 = _mm_mul_ps(vp0123, vt0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 102 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vt0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 106 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12()
|
D | velu-neonfma-rr1-p6-x16.c | 74 float32x4_t vp0123 = vfmaq_f32(vc5, vc6, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() local 79 vp0123 = vfmaq_f32(vc4, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 84 vp0123 = vfmaq_f32(vc3, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 89 vp0123 = vfmaq_f32(vc2, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 94 vp0123 = vmulq_f32(vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 108 vp0123 = vfmaq_f32(vt0123, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 113 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16()
|
D | velu-wasmsimd-arm-rr2-p6-x16.c | 82 v128_t vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt0123), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() local 87 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 92 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 97 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 102 vp0123 = wasm_f32x4_mul(vp0123, vt0123); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 116 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vt0123); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 121 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16()
|
D | velu-wasmsimd-x86-rr2-p6-x12.c | 82 v128_t vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt0123), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() local 88 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 92 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 96 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 100 vp0123 = wasm_f32x4_mul(vp0123, vt0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 111 vp0123 = wasm_f32x4_add(wasm_f32x4_mul(vp0123, vt0123), vt0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 115 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12()
|
D | velu-sse41-rr2-p6-x16.c | 82 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc6, vt0123), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() local 87 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 92 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 97 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 102 vp0123 = _mm_mul_ps(vp0123, vt0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 116 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vt0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 121 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16()
|
D | velu-neon-rr2-p6-x16.c | 80 float32x4_t vp0123 = vmlaq_f32(vc5, vc6, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() local 85 vp0123 = vmlaq_f32(vc4, vp0123, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 90 vp0123 = vmlaq_f32(vc3, vp0123, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 95 vp0123 = vmlaq_f32(vc2, vp0123, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 100 vp0123 = vmulq_f32(vp0123, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 114 vp0123 = vmlaq_f32(vt0123, vp0123, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 119 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x16()
|
D | velu-neonfma-rr1-p6-x20.c | 80 float32x4_t vp0123 = vfmaq_f32(vc5, vc6, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() local 86 vp0123 = vfmaq_f32(vc4, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 92 vp0123 = vfmaq_f32(vc3, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 98 vp0123 = vfmaq_f32(vc2, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 104 vp0123 = vmulq_f32(vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 121 vp0123 = vfmaq_f32(vt0123, vp0123, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 127 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
|
D | velu-sse2-rr2-p6-x16.c | 82 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc6, vt0123), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() local 87 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 92 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 97 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 102 vp0123 = _mm_mul_ps(vp0123, vt0123); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 116 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vt0123); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 121 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | wasmsimd-p5-x4.c | 69 v128_t vp0123 = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vt0123)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x4() local 71 vp0123 = wasm_f32x4_add(vc3, wasm_f32x4_mul(vp0123, vt0123)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x4() 73 vp0123 = wasm_f32x4_add(vc2, wasm_f32x4_mul(vp0123, vt0123)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x4() 75 vp0123 = wasm_f32x4_add(vc1, wasm_f32x4_mul(vp0123, vt0123)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x4() 83 v128_t vf0123 = wasm_f32x4_add(vs0123, wasm_f32x4_mul(vt0123, vp0123)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x4()
|
D | sse2-p5-x4.c | 69 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() local 71 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 73 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 75 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 83 __m128 vf0123 = _mm_add_ps(_mm_mul_ps(vt0123, vp0123), vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
|
D | sse2-p5-x8-acc2.c | 77 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() local 80 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 83 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 86 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 96 __m128 vf0123 = _mm_add_ps(_mm_mul_ps(vt0123, vp0123), vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
|
D | neonfma-p5-x8-acc2.c | 80 float32x4_t vp0123 = vfmaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() local 83 vp0123 = vfmaq_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 86 vp0123 = vfmaq_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 89 vp0123 = vfmaq_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 99 float32x4_t vf0123 = vfmaq_f32(vs0123, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
|
D | wasmsimd-p5-x8.c | 76 v128_t vp0123 = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vt0123)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8() local 79 vp0123 = wasm_f32x4_add(vc3, wasm_f32x4_mul(vp0123, vt0123)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8() 82 vp0123 = wasm_f32x4_add(vc2, wasm_f32x4_mul(vp0123, vt0123)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8() 85 vp0123 = wasm_f32x4_add(vc1, wasm_f32x4_mul(vp0123, vt0123)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8() 95 v128_t vf0123 = wasm_f32x4_add(vs0123, wasm_f32x4_mul(vt0123, vp0123)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8()
|
D | neon-p5-x8.c | 80 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() local 83 vp0123 = vmlaq_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 86 vp0123 = vmlaq_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 89 vp0123 = vmlaq_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 99 float32x4_t vf0123 = vmlaq_f32(vs0123, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
|
D | sse2-p5-x8.c | 76 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() local 79 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 82 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 85 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 95 __m128 vf0123 = _mm_add_ps(_mm_mul_ps(vt0123, vp0123), vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
|