/external/XNNPACK/src/f32-velu/gen/ |
D | velu-neonfma-rr1-p6-x20.c | 84 float32x4_t vpGHIJ = vfmaq_f32(vc5, vc6, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() local 90 vpGHIJ = vfmaq_f32(vc4, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 96 vpGHIJ = vfmaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 102 vpGHIJ = vfmaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 108 vpGHIJ = vmulq_f32(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 125 vpGHIJ = vfmaq_f32(vtGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 131 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
|
D | velu-wasmsimd-arm-rr2-p6-x20.c | 93 v128_t vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() local 99 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 105 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 111 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 117 vpGHIJ = wasm_f32x4_mul(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 134 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 140 const v128_t veGHIJ = wasm_f32x4_mul(wasm_f32x4_add(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
|
D | velu-sse41-rr2-p6-x20.c | 93 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() local 99 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 105 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 111 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 117 vpGHIJ = _mm_mul_ps(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 134 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 140 const __m128 veGHIJ = _mm_mul_ps(_mm_add_ps(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
|
D | velu-neon-rr2-p6-x20.c | 91 float32x4_t vpGHIJ = vmlaq_f32(vc5, vc6, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() local 97 vpGHIJ = vmlaq_f32(vc4, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 103 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 109 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 115 vpGHIJ = vmulq_f32(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 132 vpGHIJ = vmlaq_f32(vtGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 138 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
|
D | velu-neon-rr2-p6-x24.c | 98 float32x4_t vpGHIJ = vmlaq_f32(vc5, vc6, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() local 105 vpGHIJ = vmlaq_f32(vc4, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 112 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 119 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 126 vpGHIJ = vmulq_f32(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 146 vpGHIJ = vmlaq_f32(vtGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 153 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
|
D | velu-wasmsimd-x86-rr2-p6-x20.c | 108 v128_t vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() local 114 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 120 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 126 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 132 vpGHIJ = wasm_f32x4_mul(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 149 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 155 const v128_t veGHIJ = wasm_f32x4_mul(wasm_f32x4_add(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
|
D | velu-sse2-rr2-p6-x20.c | 93 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() local 99 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 105 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 111 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 117 vpGHIJ = _mm_mul_ps(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 134 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 140 const __m128 veGHIJ = _mm_mul_ps(_mm_add_ps(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20()
|
D | velu-neonfma-rr1-p6-x24.c | 90 float32x4_t vpGHIJ = vfmaq_f32(vc5, vc6, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() local 97 vpGHIJ = vfmaq_f32(vc4, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 104 vpGHIJ = vfmaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 111 vpGHIJ = vfmaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 118 vpGHIJ = vmulq_f32(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 138 vpGHIJ = vfmaq_f32(vtGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 145 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24()
|
D | velu-sse41-rr2-p6-x24.c | 100 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() local 107 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 114 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 121 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 128 vpGHIJ = _mm_mul_ps(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 148 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 155 const __m128 veGHIJ = _mm_mul_ps(_mm_add_ps(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
|
D | velu-wasmsimd-arm-rr2-p6-x24.c | 100 v128_t vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() local 107 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 114 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 121 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 128 vpGHIJ = wasm_f32x4_mul(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 148 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 155 const v128_t veGHIJ = wasm_f32x4_mul(wasm_f32x4_add(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
|
D | velu-wasmsimd-x86-rr2-p6-x24.c | 117 v128_t vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() local 125 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 132 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 139 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 146 vpGHIJ = wasm_f32x4_mul(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 166 vpGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 173 const v128_t veGHIJ = wasm_f32x4_mul(wasm_f32x4_add(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
|
D | velu-sse2-rr2-p6-x24.c | 100 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc6, vtGHIJ), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() local 107 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 114 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 121 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 128 vpGHIJ = _mm_mul_ps(vpGHIJ, vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 148 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vtGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 155 const __m128 veGHIJ = _mm_mul_ps(_mm_add_ps(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-p5-x20.c | 104 float32x4_t vpGHIJ = vfmaq_f32(vc4, vc5, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() local 110 vpGHIJ = vfmaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 116 vpGHIJ = vfmaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 122 vpGHIJ = vfmaq_f32(vc1, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 138 float32x4_t vfGHIJ = vfmaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20()
|
D | wasmsimd-p5-x20-acc5.c | 105 v128_t vpGHIJ = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc5() local 111 vpGHIJ = wasm_f32x4_add(vc3, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc5() 117 vpGHIJ = wasm_f32x4_add(vc2, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc5() 123 vpGHIJ = wasm_f32x4_add(vc1, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc5() 139 v128_t vfGHIJ = wasm_f32x4_add(vsGHIJ, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc5()
|
D | neon-p5-x20-acc5.c | 109 float32x4_t vpGHIJ = vmlaq_f32(vc4, vc5, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() local 115 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 121 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 127 vpGHIJ = vmlaq_f32(vc1, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 143 float32x4_t vfGHIJ = vmlaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
|
D | neonfma-p5-x20-acc5.c | 108 float32x4_t vpGHIJ = vfmaq_f32(vc4, vc5, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() local 114 vpGHIJ = vfmaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 120 vpGHIJ = vfmaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 126 vpGHIJ = vfmaq_f32(vc1, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 142 float32x4_t vfGHIJ = vfmaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5()
|
D | sse2-p5-x20.c | 101 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc5, vtGHIJ), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() local 107 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 113 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 119 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 135 __m128 vfGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
|
D | neon-p5-x20.c | 105 float32x4_t vpGHIJ = vmlaq_f32(vc4, vc5, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() local 111 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 117 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 123 vpGHIJ = vmlaq_f32(vc1, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 139 float32x4_t vfGHIJ = vmlaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
|
D | neon-p5-x20-acc2.c | 106 float32x4_t vpGHIJ = vmlaq_f32(vc4, vc5, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() local 112 vpGHIJ = vmlaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 118 vpGHIJ = vmlaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 124 vpGHIJ = vmlaq_f32(vc1, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 140 float32x4_t vfGHIJ = vmlaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2()
|
D | sse2-p5-x20-acc2.c | 102 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc5, vtGHIJ), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() local 108 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 114 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 120 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 136 __m128 vfGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
|
D | neonfma-p5-x20-acc2.c | 105 float32x4_t vpGHIJ = vfmaq_f32(vc4, vc5, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() local 111 vpGHIJ = vfmaq_f32(vc3, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() 117 vpGHIJ = vfmaq_f32(vc2, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() 123 vpGHIJ = vfmaq_f32(vc1, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() 139 float32x4_t vfGHIJ = vfmaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2()
|
D | wasmsimd-p5-x20-acc2.c | 102 v128_t vpGHIJ = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc2() local 108 vpGHIJ = wasm_f32x4_add(vc3, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc2() 114 vpGHIJ = wasm_f32x4_add(vc2, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc2() 120 vpGHIJ = wasm_f32x4_add(vc1, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc2() 136 v128_t vfGHIJ = wasm_f32x4_add(vsGHIJ, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc2()
|
D | wasmsimd-p5-x20.c | 101 v128_t vpGHIJ = wasm_f32x4_add(vc4, wasm_f32x4_mul(vc5, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20() local 107 vpGHIJ = wasm_f32x4_add(vc3, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20() 113 vpGHIJ = wasm_f32x4_add(vc2, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20() 119 vpGHIJ = wasm_f32x4_add(vc1, wasm_f32x4_mul(vpGHIJ, vtGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20() 135 v128_t vfGHIJ = wasm_f32x4_add(vsGHIJ, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20()
|
D | sse2-p5-x20-acc5.c | 105 __m128 vpGHIJ = _mm_add_ps(_mm_mul_ps(vc5, vtGHIJ), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() local 111 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 117 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 123 vpGHIJ = _mm_add_ps(_mm_mul_ps(vpGHIJ, vtGHIJ), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 139 __m128 vfGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | wasmsimd-p5-div-x20.c | 86 v128_t vpGHIJ = wasm_f32x4_add(vc4, wasm_f32x4_mul(vtGHIJ, vc5)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() local 92 vpGHIJ = wasm_f32x4_add(vc3, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 98 vpGHIJ = wasm_f32x4_add(vc2, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 104 vpGHIJ = wasm_f32x4_add(vc1, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 116 const v128_t veGHIJ = wasm_f32x4_add(vsGHIJ, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20()
|