/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x64-acc2.c | 106 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() local 115 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 125 __m256 vp7 = _mm256_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 134 vp7 = _mm256_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 143 vp7 = _mm256_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 152 vp7 = _mm256_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 165 vt7 = _mm256_mul_ps(vt7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 174 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
|
D | avx2-p5-x64.c | 105 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() local 114 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 124 __m256 vp7 = _mm256_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 133 vp7 = _mm256_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 142 vp7 = _mm256_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 151 vp7 = _mm256_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 164 vt7 = _mm256_mul_ps(vt7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 173 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
|
D | avx2-p5-x64-acc4.c | 108 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() local 117 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 127 __m256 vp7 = _mm256_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 136 vp7 = _mm256_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 145 vp7 = _mm256_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 154 vp7 = _mm256_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 167 vt7 = _mm256_mul_ps(vt7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 176 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
|
D | avx512f-p5-scalef-x128-acc2.c | 82 __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() local 91 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 101 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 110 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 119 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 128 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 137 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128-acc4.c | 84 __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() local 93 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 103 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 112 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 121 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 130 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 139 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
|
D | avx512f-p5-scalef-x128.c | 81 __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() local 90 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 100 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 109 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 118 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 127 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 136 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx2-p5-x72-acc3.c | 112 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() local 122 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 133 __m256 vp7 = _mm256_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 143 vp7 = _mm256_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 153 vp7 = _mm256_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 163 vp7 = _mm256_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 177 vt7 = _mm256_mul_ps(vt7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 187 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
|
D | avx2-p5-x72.c | 110 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() local 120 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 131 __m256 vp7 = _mm256_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 141 vp7 = _mm256_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 151 vp7 = _mm256_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 161 vp7 = _mm256_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 175 vt7 = _mm256_mul_ps(vt7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 185 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
|
D | avx512f-p5-scalef-x144-acc3.c | 86 __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 96 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 107 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 117 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 127 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 137 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 147 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
|
D | avx512f-p5-scalef-x144.c | 84 __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() local 94 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 105 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 115 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 125 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 135 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 145 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
D | avx2-p5-x80-acc2.c | 116 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() local 127 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 139 __m256 vp7 = _mm256_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 150 vp7 = _mm256_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 161 vp7 = _mm256_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 172 vp7 = _mm256_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 187 vt7 = _mm256_mul_ps(vt7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 198 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
|
D | avx2-p5-x80.c | 115 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() local 126 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 138 __m256 vp7 = _mm256_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 149 vp7 = _mm256_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 160 vp7 = _mm256_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 171 vp7 = _mm256_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 186 vt7 = _mm256_mul_ps(vt7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 197 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x80-acc5.c | 119 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() local 130 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 142 __m256 vp7 = _mm256_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 153 vp7 = _mm256_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 164 vp7 = _mm256_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 175 vp7 = _mm256_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 190 vt7 = _mm256_mul_ps(vt7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 201 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x64.c | 107 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() local 116 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 126 __m256 vp7 = _mm256_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 135 vp7 = _mm256_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 144 vp7 = _mm256_fmadd_ps(vp7, vt7, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 153 vp7 = _mm256_fmadd_ps(vp7, vt7, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 166 vt7 = _mm256_mul_ps(vt7, vs7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 175 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
|
D | avx512f-p5-scalef-x128.c | 82 __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() local 91 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 101 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 110 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 119 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 128 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 137 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx2-p5-x72.c | 112 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() local 122 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 133 __m256 vp7 = _mm256_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 143 vp7 = _mm256_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 153 vp7 = _mm256_fmadd_ps(vp7, vt7, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 163 vp7 = _mm256_fmadd_ps(vp7, vt7, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 177 vt7 = _mm256_mul_ps(vt7, vs7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 187 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
|
D | avx512f-p5-scalef-x144.c | 85 __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() local 95 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 106 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 116 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 126 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 136 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 146 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
D | avx2-p5-x80.c | 117 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() local 128 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 140 __m256 vp7 = _mm256_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 151 vp7 = _mm256_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 162 vp7 = _mm256_fmadd_ps(vp7, vt7, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 173 vp7 = _mm256_fmadd_ps(vp7, vt7, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 188 vt7 = _mm256_mul_ps(vt7, vs7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 199 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x128.c | 73 __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() local 82 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 92 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 101 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 110 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 119 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 128 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x144.c | 75 __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local 85 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 96 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 106 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 116 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 126 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 136 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x128.c | 73 __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() local 82 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 92 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 101 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 110 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 119 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 128 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x128-acc2.c | 75 __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() local 84 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 94 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 103 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 112 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 121 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 130 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx512f-rr1-p6-x128.c | 94 __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2, vz7); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() local 103 __m512 vp7 = _mm512_fmadd_ps(vc6, vt7, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 112 vp7 = _mm512_fmadd_ps(vp7, vt7, vc4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 121 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 130 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 146 vp7 = _mm512_mul_ps(vp7, vt7); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 147 vt7 = _mm512_mul_ps(vt7, vs7); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 165 vp7 = _mm512_fmadd_ps(vp7, vt7, vt7); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128()
|
D | velu-avx2-rr1-p6-x64.c | 92 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2, vz7); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() local 101 __m256 vp7 = _mm256_fmadd_ps(vc6, vt7, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 110 vp7 = _mm256_fmadd_ps(vp7, vt7, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 119 vp7 = _mm256_fmadd_ps(vp7, vt7, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 128 vp7 = _mm256_fmadd_ps(vp7, vt7, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 144 vp7 = _mm256_mul_ps(vp7, vt7); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 145 vt7 = _mm256_mul_ps(vt7, vs7); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 162 vp7 = _mm256_fmadd_ps(vp7, vt7, vt7); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64()
|
D | velu-avx2-rr1-p6-x72.c | 97 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2, vz7); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() local 107 __m256 vp7 = _mm256_fmadd_ps(vc6, vt7, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 117 vp7 = _mm256_fmadd_ps(vp7, vt7, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 127 vp7 = _mm256_fmadd_ps(vp7, vt7, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 137 vp7 = _mm256_fmadd_ps(vp7, vt7, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 154 vp7 = _mm256_mul_ps(vp7, vt7); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 155 vt7 = _mm256_mul_ps(vt7, vs7); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 174 vp7 = _mm256_fmadd_ps(vp7, vt7, vt7); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
|