/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x80-acc2.c | 118 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() local 129 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 141 __m256 vp9 = _mm256_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 152 vp9 = _mm256_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 163 vp9 = _mm256_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 174 vp9 = _mm256_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 189 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 200 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
|
D | avx2-p5-x80.c | 117 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() local 128 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 140 __m256 vp9 = _mm256_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 151 vp9 = _mm256_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 162 vp9 = _mm256_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 173 vp9 = _mm256_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 188 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 199 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
|
D | avx512f-p5-scalef-x160-acc5.c | 93 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() local 104 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 116 __m512 vp9 = _mm512_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 127 vp9 = _mm512_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 138 vp9 = _mm512_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 149 vp9 = _mm512_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 160 vp9 = _mm512_fmadd_ps(vp9, vt9, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
|
D | avx512f-p5-scalef-x160.c | 89 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() local 100 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 112 __m512 vp9 = _mm512_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 123 vp9 = _mm512_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 134 vp9 = _mm512_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 145 vp9 = _mm512_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 156 vp9 = _mm512_fmadd_ps(vp9, vt9, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160()
|
D | avx512f-p5-scalef-x160-acc2.c | 90 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local 101 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 113 __m512 vp9 = _mm512_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 124 vp9 = _mm512_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 135 vp9 = _mm512_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 146 vp9 = _mm512_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 157 vp9 = _mm512_fmadd_ps(vp9, vt9, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
|
D | avx2-p5-x80-acc5.c | 121 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() local 132 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 144 __m256 vp9 = _mm256_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 155 vp9 = _mm256_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 166 vp9 = _mm256_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 177 vp9 = _mm256_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 192 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 203 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
|
D | avx2-p5-x96.c | 127 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() local 140 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 154 __m256 vp9 = _mm256_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 167 vp9 = _mm256_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 180 vp9 = _mm256_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 193 vp9 = _mm256_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 210 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 223 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
|
D | avx2-p5-x96-acc2.c | 128 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() local 141 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 155 __m256 vp9 = _mm256_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 168 vp9 = _mm256_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 181 vp9 = _mm256_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 194 vp9 = _mm256_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 211 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 224 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x80.c | 119 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() local 130 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 142 __m256 vp9 = _mm256_fmadd_ps(vc5, vt9, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 153 vp9 = _mm256_fmadd_ps(vp9, vt9, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 164 vp9 = _mm256_fmadd_ps(vp9, vt9, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 175 vp9 = _mm256_fmadd_ps(vp9, vt9, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 190 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 201 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
|
D | avx512f-p5-scalef-x160.c | 90 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() local 101 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 113 __m512 vp9 = _mm512_fmadd_ps(vc5, vt9, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 124 vp9 = _mm512_fmadd_ps(vp9, vt9, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 135 vp9 = _mm512_fmadd_ps(vp9, vt9, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 146 vp9 = _mm512_fmadd_ps(vp9, vt9, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 157 vp9 = _mm512_fmadd_ps(vp9, vt9, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
|
D | avx2-p5-x88.c | 124 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() local 136 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 149 __m256 vp9 = _mm256_fmadd_ps(vc5, vt9, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 161 vp9 = _mm256_fmadd_ps(vp9, vt9, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 173 vp9 = _mm256_fmadd_ps(vp9, vt9, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 185 vp9 = _mm256_fmadd_ps(vp9, vt9, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 201 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 213 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
|
D | avx512f-p5-scalef-x176.c | 93 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() local 105 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 118 __m512 vp9 = _mm512_fmadd_ps(vc5, vt9, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 130 vp9 = _mm512_fmadd_ps(vp9, vt9, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 142 vp9 = _mm512_fmadd_ps(vp9, vt9, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 154 vp9 = _mm512_fmadd_ps(vp9, vt9, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 166 vp9 = _mm512_fmadd_ps(vp9, vt9, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x160-acc2.c | 91 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local 102 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 114 __m512 vp9 = _mm512_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 125 vp9 = _mm512_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 136 vp9 = _mm512_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 147 vp9 = _mm512_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 158 vp9 = _mm512_fmadd_ps(vp9, vt9, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
|
D | avx512f-p5-scalef-x160.c | 90 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() local 101 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 113 __m512 vp9 = _mm512_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 124 vp9 = _mm512_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 135 vp9 = _mm512_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 146 vp9 = _mm512_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 157 vp9 = _mm512_fmadd_ps(vp9, vt9, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160()
|
D | avx2-p5-x80.c | 118 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() local 129 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 141 __m256 vp9 = _mm256_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 152 vp9 = _mm256_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 163 vp9 = _mm256_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 174 vp9 = _mm256_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 189 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 200 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x80-acc5.c | 122 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() local 133 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 145 __m256 vp9 = _mm256_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 156 vp9 = _mm256_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 167 vp9 = _mm256_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 178 vp9 = _mm256_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 193 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 204 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
|
D | avx2-p5-x80-acc2.c | 119 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() local 130 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 142 __m256 vp9 = _mm256_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 153 vp9 = _mm256_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 164 vp9 = _mm256_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 175 vp9 = _mm256_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 190 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 201 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2()
|
D | avx512f-p5-scalef-x160-acc5.c | 94 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() local 105 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 117 __m512 vp9 = _mm512_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 128 vp9 = _mm512_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 139 vp9 = _mm512_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 150 vp9 = _mm512_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 161 vp9 = _mm512_fmadd_ps(vp9, vt9, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
|
D | avx2-p5-x96-acc6.c | 133 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() local 146 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 160 __m256 vp9 = _mm256_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 173 vp9 = _mm256_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 186 vp9 = _mm256_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 199 vp9 = _mm256_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 216 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 229 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96-acc3.c | 130 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() local 143 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 157 __m256 vp9 = _mm256_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 170 vp9 = _mm256_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 183 vp9 = _mm256_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 196 vp9 = _mm256_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 213 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 226 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x160.c | 79 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() local 90 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 102 __m512 vp9 = _mm512_fmadd_ps(vc5, vt9, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 113 vp9 = _mm512_fmadd_ps(vp9, vt9, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 124 vp9 = _mm512_fmadd_ps(vp9, vt9, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 135 vp9 = _mm512_fmadd_ps(vp9, vt9, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 146 vp9 = _mm512_fmadd_ps(vp9, vt9, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
|
D | avx512f-p5-scalef-x176.c | 81 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() local 93 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 106 __m512 vp9 = _mm512_fmadd_ps(vc5, vt9, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 118 vp9 = _mm512_fmadd_ps(vp9, vt9, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 130 vp9 = _mm512_fmadd_ps(vp9, vt9, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 142 vp9 = _mm512_fmadd_ps(vp9, vt9, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 154 vp9 = _mm512_fmadd_ps(vp9, vt9, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x160.c | 79 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() local 90 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 102 __m512 vp9 = _mm512_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 113 vp9 = _mm512_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 124 vp9 = _mm512_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 135 vp9 = _mm512_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 146 vp9 = _mm512_fmadd_ps(vp9, vt9, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
|
D | avx512f-p5-scalef-x160-acc2.c | 81 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() local 92 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 104 __m512 vp9 = _mm512_fmadd_ps(vc5, vt9, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 115 vp9 = _mm512_fmadd_ps(vp9, vt9, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 126 vp9 = _mm512_fmadd_ps(vp9, vt9, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 137 vp9 = _mm512_fmadd_ps(vp9, vt9, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 148 vp9 = _mm512_fmadd_ps(vp9, vt9, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx2-rr1-p6-x80.c | 107 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2, vz9); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() local 118 __m256 vp9 = _mm256_fmadd_ps(vc6, vt9, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 129 vp9 = _mm256_fmadd_ps(vp9, vt9, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 140 vp9 = _mm256_fmadd_ps(vp9, vt9, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 151 vp9 = _mm256_fmadd_ps(vp9, vt9, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 171 vp9 = _mm256_mul_ps(vp9, vt9); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 172 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 193 vp9 = _mm256_fmadd_ps(vp9, vt9, vt9); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80()
|