/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 40 const __m512 vscalee = _mm512_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 185 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 186 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 187 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 188 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 189 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 190 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 191 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 192 const __m512 ve7 = _mm512_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 193 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 40 const __m512 vscalee = _mm512_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() local 175 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 176 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 177 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 178 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 179 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 180 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 181 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 182 const __m512 ve7 = _mm512_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 183 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 40 const __m512 vscalee = _mm512_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() local 165 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 166 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 167 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 168 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 169 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 170 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 171 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 172 const __m512 ve7 = _mm512_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 173 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 40 const __m512 vscalee = _mm512_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local 155 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 156 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 157 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 158 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 159 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 160 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 161 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 162 const __m512 ve7 = _mm512_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 163 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() [all …]
|
D | avx512f-p5-scalef-x128.c | 40 const __m512 vscalee = _mm512_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() local 145 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 146 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 147 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 148 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 149 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 150 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 151 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 152 const __m512 ve7 = _mm512_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 199 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() [all …]
|
D | avx512f-p5-scalef-x112.c | 40 const __m512 vscalee = _mm512_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() local 135 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 136 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 137 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 138 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 139 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 140 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 141 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 186 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 220 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
|
D | avx2-p5-x96.c | 46 const __m256 vscalee = _mm256_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() local 191 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 192 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 193 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 194 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 195 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 196 __m256 ve5 = _mm256_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 197 __m256 ve6 = _mm256_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 198 __m256 ve7 = _mm256_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 199 __m256 ve8 = _mm256_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() [all …]
|
D | avx512f-p5-scalef-x96.c | 40 const __m512 vscalee = _mm512_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() local 125 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 126 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 127 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 128 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 129 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 130 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 173 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 207 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
|
D | avx2-p5-x88.c | 46 const __m256 vscalee = _mm256_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() local 181 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 182 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 183 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 184 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 185 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 186 __m256 ve5 = _mm256_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 187 __m256 ve6 = _mm256_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 188 __m256 ve7 = _mm256_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 189 __m256 ve8 = _mm256_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() [all …]
|
D | avx512f-p5-scalef-x80.c | 40 const __m512 vscalee = _mm512_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() local 115 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 116 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 117 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 118 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 119 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 160 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 194 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
|
D | avx2-p5-x80.c | 46 const __m256 vscalee = _mm256_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() local 171 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 172 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 173 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 174 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 175 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 176 __m256 ve5 = _mm256_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 177 __m256 ve6 = _mm256_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 178 __m256 ve7 = _mm256_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 179 __m256 ve8 = _mm256_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() [all …]
|
D | avx2-p5-x72.c | 46 const __m256 vscalee = _mm256_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() local 161 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 162 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 163 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 164 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 165 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 166 __m256 ve5 = _mm256_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 167 __m256 ve6 = _mm256_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 168 __m256 ve7 = _mm256_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 169 __m256 ve8 = _mm256_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() [all …]
|
D | avx2-p5-x56.c | 46 const __m256 vscalee = _mm256_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() local 141 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 142 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 143 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 144 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 145 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 146 __m256 ve5 = _mm256_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 147 __m256 ve6 = _mm256_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 214 __m256 ve = _mm256_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 254 __m256 ve = _mm256_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56()
|
D | avx512f-p5-scalef-x64.c | 40 const __m512 vscalee = _mm512_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() local 105 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 106 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 107 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 108 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 147 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 181 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64()
|
D | avx2-p5-x64.c | 46 const __m256 vscalee = _mm256_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() local 151 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 152 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 153 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 154 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 155 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 156 __m256 ve5 = _mm256_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 157 __m256 ve6 = _mm256_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 158 __m256 ve7 = _mm256_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 229 __m256 ve = _mm256_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() [all …]
|
D | avx2-p5-x48.c | 46 const __m256 vscalee = _mm256_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() local 131 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 132 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 133 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 134 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 135 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 136 __m256 ve5 = _mm256_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 199 __m256 ve = _mm256_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 239 __m256 ve = _mm256_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48()
|
D | avx512f-p5-scalef-x48.c | 40 const __m512 vscalee = _mm512_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48() local 95 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48() 96 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48() 97 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48() 134 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48() 168 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48()
|
D | avx2-p5-x40.c | 46 const __m256 vscalee = _mm256_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() local 121 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 122 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 123 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 124 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 125 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 184 __m256 ve = _mm256_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 224 __m256 ve = _mm256_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
|
D | avx512f-p5-scalef-x32.c | 40 const __m512 vscalee = _mm512_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() local 85 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 86 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 121 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 155 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32()
|
D | avx2-p5-x32.c | 46 const __m256 vscalee = _mm256_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() local 111 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 112 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 113 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 114 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 169 __m256 ve = _mm256_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 209 __m256 ve = _mm256_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32()
|
D | avx2-p5-x24.c | 46 const __m256 vscalee = _mm256_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x24() local 101 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x24() 102 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x24() 103 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x24() 154 __m256 ve = _mm256_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x24() 194 __m256 ve = _mm256_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x24()
|
D | avx512f-p5-scalef-x16.c | 40 const __m512 vscalee = _mm512_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() local 75 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 108 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 142 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16()
|
D | avx2-p5-x16.c | 46 const __m256 vscalee = _mm256_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16() local 91 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16() 92 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16() 139 __m256 ve = _mm256_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16() 179 __m256 ve = _mm256_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16()
|
D | avx2-p5-x8.c | 46 const __m256 vscalee = _mm256_set1_ps(scale_exp); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() local 81 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() 124 __m256 ve = _mm256_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() 164 __m256 ve = _mm256_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8()
|
/external/XNNPACK/src/f32-vscaleextexp/ |
D | avx512f-p5-scalef.c.in | 40 const __m512 vscalee = _mm512_set1_ps(scale_exp); variable 87 const __m512 ve${N} = _mm512_add_ps(vn${N}, vscalee); 122 const __m512 ve = _mm512_add_ps(vn, vscalee); 156 const __m512 ve = _mm512_add_ps(vn, vscalee);
|