/external/XNNPACK/src/f32-velu/gen/ |
D | velu-neonfma-rr1-p6-x8.c | 63 float32x4_t vp4567 = vfmaq_f32(vc5, vc6, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() local 66 vp4567 = vfmaq_f32(vc4, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 69 vp4567 = vfmaq_f32(vc3, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 72 vp4567 = vfmaq_f32(vc2, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 75 vp4567 = vmulq_f32(vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 83 vp4567 = vfmaq_f32(vt4567, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 86 const float32x4_t ve4567 = vmulq_f32(vaddq_f32(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8()
|
D | velu-neon-rr2-p6-x8.c | 67 float32x4_t vp4567 = vmlaq_f32(vc5, vc6, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() local 70 vp4567 = vmlaq_f32(vc4, vp4567, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 73 vp4567 = vmlaq_f32(vc3, vp4567, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 76 vp4567 = vmlaq_f32(vc2, vp4567, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 79 vp4567 = vmulq_f32(vp4567, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 87 vp4567 = vmlaq_f32(vt4567, vp4567, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 90 const float32x4_t ve4567 = vmulq_f32(vaddq_f32(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x8()
|
D | velu-sse41-rr2-p6-x8.c | 69 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc6, vt4567), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() local 72 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 75 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 78 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 81 vp4567 = _mm_mul_ps(vp4567, vt4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 89 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vt4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 92 const __m128 ve4567 = _mm_mul_ps(_mm_add_ps(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8()
|
D | velu-wasmsimd-arm-rr2-p6-x8.c | 69 v128_t vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt4567), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() local 72 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 75 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 78 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 81 vp4567 = wasm_f32x4_mul(vp4567, vt4567); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 89 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vt4567); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 92 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8()
|
D | velu-neonfma-rr1-p6-x12.c | 69 float32x4_t vp4567 = vfmaq_f32(vc5, vc6, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() local 73 vp4567 = vfmaq_f32(vc4, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 77 vp4567 = vfmaq_f32(vc3, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 81 vp4567 = vfmaq_f32(vc2, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 85 vp4567 = vmulq_f32(vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 96 vp4567 = vfmaq_f32(vt4567, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 100 const float32x4_t ve4567 = vmulq_f32(vaddq_f32(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12()
|
D | velu-sse2-rr2-p6-x8.c | 69 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc6, vt4567), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() local 72 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 75 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 78 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 81 vp4567 = _mm_mul_ps(vp4567, vt4567); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 89 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vt4567); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 92 const __m128 ve4567 = _mm_mul_ps(_mm_add_ps(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8()
|
D | velu-wasmsimd-x86-rr2-p6-x8.c | 75 v128_t vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt4567), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() local 78 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 81 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 84 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 87 vp4567 = wasm_f32x4_mul(vp4567, vt4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 95 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vt4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 98 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8()
|
D | velu-neon-rr2-p6-x12.c | 74 float32x4_t vp4567 = vmlaq_f32(vc5, vc6, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() local 78 vp4567 = vmlaq_f32(vc4, vp4567, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 82 vp4567 = vmlaq_f32(vc3, vp4567, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 86 vp4567 = vmlaq_f32(vc2, vp4567, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 90 vp4567 = vmulq_f32(vp4567, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 101 vp4567 = vmlaq_f32(vt4567, vp4567, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 105 const float32x4_t ve4567 = vmulq_f32(vaddq_f32(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x12()
|
D | velu-wasmsimd-arm-rr2-p6-x12.c | 76 v128_t vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt4567), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() local 80 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 84 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 88 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 92 vp4567 = wasm_f32x4_mul(vp4567, vt4567); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 103 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vt4567); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 107 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12()
|
D | velu-sse2-rr2-p6-x12.c | 76 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc6, vt4567), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() local 80 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 84 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 88 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 92 vp4567 = _mm_mul_ps(vp4567, vt4567); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 103 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vt4567); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 107 const __m128 ve4567 = _mm_mul_ps(_mm_add_ps(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12()
|
D | velu-sse41-rr2-p6-x12.c | 76 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc6, vt4567), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() local 80 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 84 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 88 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 92 vp4567 = _mm_mul_ps(vp4567, vt4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 103 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vt4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 107 const __m128 ve4567 = _mm_mul_ps(_mm_add_ps(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12()
|
D | velu-neonfma-rr1-p6-x16.c | 75 float32x4_t vp4567 = vfmaq_f32(vc5, vc6, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() local 80 vp4567 = vfmaq_f32(vc4, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 85 vp4567 = vfmaq_f32(vc3, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 90 vp4567 = vfmaq_f32(vc2, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 95 vp4567 = vmulq_f32(vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 109 vp4567 = vfmaq_f32(vt4567, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 114 const float32x4_t ve4567 = vmulq_f32(vaddq_f32(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16()
|
D | velu-wasmsimd-arm-rr2-p6-x16.c | 83 v128_t vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt4567), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() local 88 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 93 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 98 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 103 vp4567 = wasm_f32x4_mul(vp4567, vt4567); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 117 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vt4567); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 122 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16()
|
D | velu-wasmsimd-x86-rr2-p6-x12.c | 84 v128_t vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt4567), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() local 89 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 93 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 97 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 101 vp4567 = wasm_f32x4_mul(vp4567, vt4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 112 vp4567 = wasm_f32x4_add(wasm_f32x4_mul(vp4567, vt4567), vt4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 116 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12()
|
D | velu-sse41-rr2-p6-x16.c | 83 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc6, vt4567), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() local 88 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 93 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 98 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 103 vp4567 = _mm_mul_ps(vp4567, vt4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 117 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vt4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 122 const __m128 ve4567 = _mm_mul_ps(_mm_add_ps(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16()
|
D | velu-neon-rr2-p6-x16.c | 81 float32x4_t vp4567 = vmlaq_f32(vc5, vc6, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() local 86 vp4567 = vmlaq_f32(vc4, vp4567, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 91 vp4567 = vmlaq_f32(vc3, vp4567, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 96 vp4567 = vmlaq_f32(vc2, vp4567, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 101 vp4567 = vmulq_f32(vp4567, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 115 vp4567 = vmlaq_f32(vt4567, vp4567, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 120 const float32x4_t ve4567 = vmulq_f32(vaddq_f32(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x16()
|
D | velu-neonfma-rr1-p6-x20.c | 81 float32x4_t vp4567 = vfmaq_f32(vc5, vc6, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() local 87 vp4567 = vfmaq_f32(vc4, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 93 vp4567 = vfmaq_f32(vc3, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 99 vp4567 = vfmaq_f32(vc2, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 105 vp4567 = vmulq_f32(vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 122 vp4567 = vfmaq_f32(vt4567, vp4567, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 128 const float32x4_t ve4567 = vmulq_f32(vaddq_f32(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
|
D | velu-sse2-rr2-p6-x16.c | 83 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc6, vt4567), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() local 88 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 93 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 98 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 103 vp4567 = _mm_mul_ps(vp4567, vt4567); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 117 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vt4567); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 122 const __m128 ve4567 = _mm_mul_ps(_mm_add_ps(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | sse2-p5-x8-acc2.c | 78 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() local 81 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 84 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 87 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 97 __m128 vf4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
|
D | neonfma-p5-x8-acc2.c | 81 float32x4_t vp4567 = vfmaq_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() local 84 vp4567 = vfmaq_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 87 vp4567 = vfmaq_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 90 vp4567 = vfmaq_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 100 float32x4_t vf4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
|
D | wasmsimd-p5-x8.c | 77 v128_t vp4567 = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vt4567)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8() local 80 vp4567 = wasm_f32x4_add(vc3, wasm_f32x4_mul(vp4567, vt4567)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8() 83 vp4567 = wasm_f32x4_add(vc2, wasm_f32x4_mul(vp4567, vt4567)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8() 86 vp4567 = wasm_f32x4_add(vc1, wasm_f32x4_mul(vp4567, vt4567)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8() 96 v128_t vf4567 = wasm_f32x4_add(vs4567, wasm_f32x4_mul(vt4567, vp4567)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8()
|
D | neon-p5-x8.c | 81 float32x4_t vp4567 = vmlaq_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() local 84 vp4567 = vmlaq_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 87 vp4567 = vmlaq_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 90 vp4567 = vmlaq_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 100 float32x4_t vf4567 = vmlaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
|
D | sse2-p5-x8.c | 77 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() local 80 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 83 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 86 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 96 __m128 vf4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
|
D | neon-p5-x8-acc2.c | 82 float32x4_t vp4567 = vmlaq_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() local 85 vp4567 = vmlaq_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 88 vp4567 = vmlaq_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 91 vp4567 = vmlaq_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 101 float32x4_t vf4567 = vmlaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-p5-div-x8.c | 57 float32x4_t vp4567 = vfmaq_f32(vc4, vc5, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() local 60 vp4567 = vfmaq_f32(vc3, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 63 vp4567 = vfmaq_f32(vc2, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 66 vp4567 = vfmaq_f32(vc1, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 72 const float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8()
|