/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x72-acc3.c | 113 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() local 123 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 134 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 144 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 154 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 164 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 178 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 188 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
|
D | avx2-p5-x72.c | 111 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() local 121 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 132 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 142 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 152 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 162 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 176 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 186 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
|
D | avx512f-p5-scalef-x144.c | 85 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() local 95 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 106 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 116 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 126 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 136 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 146 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
D | avx512f-p5-scalef-x144-acc3.c | 87 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 97 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 108 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 118 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 128 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 138 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 148 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
|
D | avx2-p5-x80-acc2.c | 117 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() local 128 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 140 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 151 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 162 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 173 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 188 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 199 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
|
D | avx2-p5-x80.c | 116 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() local 127 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 139 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 150 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 161 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 172 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 187 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 198 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
|
D | avx512f-p5-scalef-x160-acc5.c | 92 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() local 103 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 115 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 126 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 137 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 148 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 159 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
|
D | avx512f-p5-scalef-x160.c | 88 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() local 99 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 111 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 122 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 133 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 144 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 155 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160()
|
D | avx512f-p5-scalef-x160-acc2.c | 89 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local 100 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 112 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 123 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 134 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 145 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 156 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x72.c | 113 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() local 123 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 134 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 144 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 154 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 164 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 178 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 188 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
|
D | avx512f-p5-scalef-x144.c | 86 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() local 96 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 107 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 117 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 127 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 137 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 147 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
D | avx2-p5-x80.c | 118 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() local 129 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 141 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 152 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 163 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 174 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 189 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 200 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
|
D | avx512f-p5-scalef-x160.c | 89 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() local 100 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 112 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 123 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 134 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 145 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 156 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x144.c | 86 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() local 96 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 107 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 117 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 127 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 137 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 147 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
D | avx512f-p5-scalef-x144-acc3.c | 88 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 98 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 109 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 119 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 129 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 139 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 149 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
|
D | avx2-p5-x72-acc3.c | 114 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3() local 124 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3() 135 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3() 145 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3() 155 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3() 165 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3() 179 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3() 189 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3()
|
D | avx2-p5-x72.c | 112 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72() local 122 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72() 133 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72() 143 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72() 153 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72() 163 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72() 177 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72() 187 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72()
|
D | avx512f-p5-scalef-x160-acc2.c | 90 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local 101 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 113 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 124 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 135 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 146 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 157 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
|
D | avx512f-p5-scalef-x160.c | 89 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() local 100 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 112 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 123 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 134 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 145 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 156 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160()
|
D | avx2-p5-x80.c | 117 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() local 128 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 140 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 151 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 162 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 173 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 188 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 199 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x80-acc5.c | 121 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() local 132 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 144 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 155 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 166 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 177 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 192 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 203 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x144.c | 76 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local 86 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 97 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 107 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 117 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 127 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 137 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
|
D | avx512f-p5-scalef-x160.c | 78 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() local 89 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 101 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 112 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 123 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 134 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 145 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x144.c | 76 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() local 86 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 97 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 107 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 117 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 127 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 137 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx2-rr1-p6-x72.c | 101 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vz8); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() local 111 __m256 vp8 = _mm256_fmadd_ps(vc6, vt8, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 121 vp8 = _mm256_fmadd_ps(vp8, vt8, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 131 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 141 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 159 vp8 = _mm256_mul_ps(vp8, vt8); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 160 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 179 vp8 = _mm256_fmadd_ps(vp8, vt8, vt8); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
|