/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx-rr2-p5-nr2-x80.c | 39 const __m256 vtwo = _mm256_set1_ps(2.0f); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() local 229 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() 230 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() 231 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() 232 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() 233 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() 234 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() 235 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() 236 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() 237 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() [all …]
|
D | avx-rr2-p5-nr2-x72.c | 39 const __m256 vtwo = _mm256_set1_ps(2.0f); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() local 212 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() 213 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() 214 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() 215 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() 216 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() 217 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() 218 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() 219 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() 220 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() [all …]
|
D | avx-rr2-p5-nr2-x64.c | 39 const __m256 vtwo = _mm256_set1_ps(2.0f); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() local 195 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() 196 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() 197 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() 198 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() 199 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() 200 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() 201 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() 202 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() 203 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() [all …]
|
D | avx-rr2-p5-nr2-x56.c | 39 const __m256 vtwo = _mm256_set1_ps(2.0f); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() local 178 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() 179 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() 180 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() 181 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() 182 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() 183 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() 184 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() 185 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() 186 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() [all …]
|
D | avx-rr2-p5-nr2-x40.c | 39 const __m256 vtwo = _mm256_set1_ps(2.0f); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() local 144 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() 145 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() 146 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() 147 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() 148 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() 149 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() 150 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() 151 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() 152 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() [all …]
|
D | avx-rr2-p5-nr2-x48.c | 39 const __m256 vtwo = _mm256_set1_ps(2.0f); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() local 161 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() 162 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() 163 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() 164 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() 165 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() 166 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() 167 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() 168 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() 169 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() [all …]
|
D | avx-rr2-p5-nr2-x32.c | 39 const __m256 vtwo = _mm256_set1_ps(2.0f); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() local 127 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 128 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 129 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 130 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 131 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 132 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 133 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 134 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 184 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() [all …]
|
D | avx-rr2-p5-nr2-x24.c | 39 const __m256 vtwo = _mm256_set1_ps(2.0f); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() local 110 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 111 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 112 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 113 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 114 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 115 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 161 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 162 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 200 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() [all …]
|
D | avx-rr2-p5-nr2-x16.c | 39 const __m256 vtwo = _mm256_set1_ps(2.0f); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() local 93 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() 94 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() 95 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() 96 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() 138 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() 139 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() 177 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() 178 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16()
|
D | avx-rr2-p5-nr2-x8.c | 39 const __m256 vtwo = _mm256_set1_ps(2.0f); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8() local 69 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8() 70 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8() 108 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8() 109 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8()
|
/external/XNNPACK/src/math/ |
D | sigmoid-avx-rr2-p5-nr2.c | 37 const __m256 vtwo = _mm256_set1_ps(2.0f); in xnn_math_f32_sigmoid__avx_rr2_p5_nr2() local 97 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); in xnn_math_f32_sigmoid__avx_rr2_p5_nr2() 98 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); in xnn_math_f32_sigmoid__avx_rr2_p5_nr2()
|
D | sigmoid-avx-rr2-p5-nr1.c | 37 const __m256 vtwo = _mm256_set1_ps(2.0f); in xnn_math_f32_sigmoid__avx_rr2_p5_nr1() local 97 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); in xnn_math_f32_sigmoid__avx_rr2_p5_nr1()
|
D | sigmoid-sse2-rr2-p5-nr1.c | 37 const __m128 vtwo = _mm_set1_ps(2.0f); in xnn_math_f32_sigmoid__sse2_rr2_p5_nr1() local 95 vr = _mm_mul_ps(vr, _mm_sub_ps(vtwo, _mm_mul_ps(vr, vd))); in xnn_math_f32_sigmoid__sse2_rr2_p5_nr1()
|
D | sigmoid-sse2-rr2-lut64-p2-nr1.c | 38 const __m128 vtwo = _mm_set1_ps(2.0f); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr1() local 125 vr = _mm_mul_ps(vr, _mm_sub_ps(vtwo, _mm_mul_ps(vr, vd))); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr1()
|
/external/XNNPACK/src/f32-sigmoid/ |
D | avx-p5.c.in | 45 const __m256 vtwo = _mm256_set1_ps(2.0f); 108 …vr${ABC[N]} = _mm256_mul_ps(vr${ABC[N]}, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr${ABC[N]}, vd${ABC[N]… 109 …vr${ABC[N]} = _mm256_mul_ps(vr${ABC[N]}, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr${ABC[N]}, vd${ABC[N]… 158 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); 159 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); 203 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd))); 204 vr = _mm256_mul_ps(vr, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr, vd)));
|