/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasm-rr2-p6-x6.c | 99 float vp5 = vc6 * vt5 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() local 106 vp5 = vp5 * vt5 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 113 vp5 = vp5 * vt5 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 120 vp5 = vp5 * vt5 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 127 vp5 *= vt5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 147 vp5 = vp5 * vt5 + vt5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 159 const float ve5 = (vp5 + vs5) * valpha; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
|
D | velu-scalar-rr2-p6-x6.c | 123 float vp5 = vc6 * vt5 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local 130 vp5 = vp5 * vt5 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 137 vp5 = vp5 * vt5 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 144 vp5 = vp5 * vt5 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 151 vp5 *= vt5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 171 vp5 = vp5 * vt5 + vt5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 183 const float ve5 = (vp5 + vs5) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
|
D | velu-avx512f-rr1-p6-x96.c | 90 __m512 vp5 = _mm512_fmadd_ps(vc6, vt5, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() local 97 vp5 = _mm512_fmadd_ps(vp5, vt5, vc4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 104 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 111 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 123 vp5 = _mm512_mul_ps(vp5, vt5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 138 vp5 = _mm512_fmadd_ps(vp5, vt5, vt5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 151 __m512 vy5 = _mm512_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96()
|
D | velu-avx2-rr1-p6-x48.c | 90 __m256 vp5 = _mm256_fmadd_ps(vc6, vt5, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() local 97 vp5 = _mm256_fmadd_ps(vp5, vt5, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() 104 vp5 = _mm256_fmadd_ps(vp5, vt5, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() 111 vp5 = _mm256_fmadd_ps(vp5, vt5, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() 123 vp5 = _mm256_mul_ps(vp5, vt5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() 137 vp5 = _mm256_fmadd_ps(vp5, vt5, vt5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() 149 const __m256 ve5 = _mm256_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x96.c | 82 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() local 89 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 96 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 103 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 110 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 123 __m512 vf5 = _mm512_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
|
D | avx512f-p5-scalef-x112.c | 86 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() local 94 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 102 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 110 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 118 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 132 __m512 vf5 = _mm512_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
|
D | avx512f-p5-scalef-x128.c | 90 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() local 99 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 108 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 117 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 126 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 141 __m512 vf5 = _mm512_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
|
D | avx2-p5-x48.c | 88 __m256 vp5 = _mm256_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() local 95 vp5 = _mm256_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 102 vp5 = _mm256_fmadd_ps(vp5, vt5, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 109 vp5 = _mm256_fmadd_ps(vp5, vt5, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 116 vp5 = _mm256_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 129 __m256 vf5 = _mm256_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48()
|
D | avx512f-p5-scalef-x144.c | 94 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local 104 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 114 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 124 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 134 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 150 __m512 vf5 = _mm512_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x96.c | 89 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() local 96 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() 103 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() 110 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() 117 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() 127 __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
|
D | avx512f-p5-scalef-x112.c | 94 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() local 102 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 110 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 118 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 126 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 137 __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
|
D | avx512f-p5-scalef-x128.c | 99 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() local 108 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 117 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 126 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 135 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 147 __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x144.c | 104 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() local 114 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 124 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 134 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 144 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 157 __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x128-acc2.c | 100 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() local 109 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 118 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 127 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 136 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 148 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128.c | 99 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() local 108 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 117 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 126 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 135 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 147 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x144.c | 104 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() local 114 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 124 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 134 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 144 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 157 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x128.c | 98 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() local 107 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 116 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 125 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 134 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 146 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x128-acc2.c | 99 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() local 108 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 117 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 126 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 135 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 147 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128-acc4.c | 101 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() local 110 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 119 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 128 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 137 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 149 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
|
D | avx512f-p5-scalef-x144.c | 103 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() local 113 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 123 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 133 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 143 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 156 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
D | avx512f-p5-scalef-x144-acc3.c | 105 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 115 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 125 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 135 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 145 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 158 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx512f-rr1-p5-scalef-div-x96.c | 79 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() local 86 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() 93 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() 100 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() 107 vp5 = _mm512_fmadd_ps(vp5, vt5, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() 114 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96()
|
D | avx512f-rr1-p5-scalef-div-x112.c | 84 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() local 92 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 100 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 108 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 116 vp5 = _mm512_fmadd_ps(vp5, vt5, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 124 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112()
|
D | avx512f-rr1-p5-scalef-nr1fma-x96.c | 79 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() local 86 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() 93 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() 100 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() 107 vp5 = _mm512_fmadd_ps(vp5, vt5, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() 114 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x128-acc2.c | 92 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() local 101 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 110 vp5 = _mm512_fmadd_ps(vp5, vt5, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 119 vp5 = _mm512_fmadd_ps(vp5, vt5, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 128 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 170 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
|