/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x56.c | 101 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() local 109 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 118 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 126 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 134 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 142 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 154 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 162 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
|
D | avx512f-p5-scalef-x112.c | 78 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() local 86 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 95 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 103 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 111 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 119 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
|
D | avx2-p5-x64.c | 106 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() local 115 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 125 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 134 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 143 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 152 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 165 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 174 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
|
D | avx512f-p5-scalef-x128.c | 81 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() local 90 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 100 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 109 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 118 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 136 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx2-p5-x72.c | 111 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() local 121 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 132 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 142 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 152 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 162 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 176 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 186 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x112.c | 70 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() local 78 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 87 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 95 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 103 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 111 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 119 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
|
D | avx512f-p5-scalef-x128.c | 72 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() local 81 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 91 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 100 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 109 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 118 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x144.c | 74 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local 84 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 95 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 105 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 115 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 125 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 135 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x64-acc2.c | 105 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() local 114 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 124 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 133 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 142 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 151 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 164 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 173 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
|
D | avx2-p5-x64-acc4.c | 107 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() local 116 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 126 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 135 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 144 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 153 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 166 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 175 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
|
D | avx2-p5-x64.c | 104 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() local 113 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 123 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 132 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 141 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 150 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 163 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 172 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
|
D | avx512f-p5-scalef-x128.c | 80 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() local 89 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 99 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 108 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 117 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 126 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 135 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x128-acc2.c | 81 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() local 90 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 100 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 109 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 118 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 136 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128-acc4.c | 83 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() local 92 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 102 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 111 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 120 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 129 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 138 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
|
D | avx2-p5-x72-acc3.c | 111 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() local 121 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 132 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 142 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 152 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 162 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 176 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 186 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
|
D | avx2-p5-x72.c | 109 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() local 119 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 130 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 140 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 150 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 160 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 174 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 184 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
|
D | avx512f-p5-scalef-x144.c | 83 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() local 93 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 104 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 114 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 124 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 134 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 144 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-p5-x64.c | 105 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() local 114 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 124 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 133 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 142 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 151 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 164 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 173 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
|
D | avx2-p5-x64-acc2.c | 106 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() local 115 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 125 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 134 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 143 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 152 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 165 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 174 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
|
D | avx512f-p5-scalef-x128-acc2.c | 82 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() local 91 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 101 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 110 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 119 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 128 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 137 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128.c | 81 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() local 90 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 100 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 109 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 118 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 136 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx2-p5-x64-acc4.c | 108 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4() local 117 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4() 127 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4() 136 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4() 145 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4() 154 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4() 167 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4() 176 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4()
|
D | avx512f-p5-scalef-x144.c | 84 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() local 94 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 105 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 115 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 125 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 135 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 145 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx512f-rr1-p6-x112.c | 89 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() local 97 __m512 vp6 = _mm512_fmadd_ps(vc6, vt6, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 105 vp6 = _mm512_fmadd_ps(vp6, vt6, vc4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 113 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 121 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 135 vp6 = _mm512_mul_ps(vp6, vt6); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 136 vt6 = _mm512_mul_ps(vt6, vs6); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 152 vp6 = _mm512_fmadd_ps(vp6, vt6, vt6); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112()
|
D | velu-avx2-rr1-p6-x56.c | 89 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() local 97 __m256 vp6 = _mm256_fmadd_ps(vc6, vt6, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 105 vp6 = _mm256_fmadd_ps(vp6, vt6, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 113 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 121 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 135 vp6 = _mm256_mul_ps(vp6, vt6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 136 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 151 vp6 = _mm256_fmadd_ps(vp6, vt6, vt6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56()
|