Home
last modified time | relevance | path

Searched refs:vr1 (Results 1 – 25 of 59) sorted by relevance

123

/external/XNNPACK/src/f32-sigmoid/gen/
Davx2-rr1-p5-nr2fma-x16.c82 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() local
85 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
88 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
91 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
Davx2-rr1-p5-nr2fma-x24.c95 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() local
99 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
103 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
107 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
Davx2-rr1-p5-nr2fma-x32.c108 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() local
113 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
118 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
123 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
Davx2-rr1-p5-nr2fma-x40.c121 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local
127 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
133 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
139 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
Davx-rr2-p5-nr2-x16.c91 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() local
95 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16()
96 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16()
99 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x32.c79 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32() local
82 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32()
85 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32()
Davx512f-rr1-p5-scalef-nr1fma-x32.c76 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32() local
79 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32()
82 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x32.c85 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32() local
88 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32()
91 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32()
Davx2-rr1-p5-nr2fma-x48.c134 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() local
141 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
148 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
155 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
Davx2-rr1-p5-nr1fma-x16.c82 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() local
85 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16()
89 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16()
Davx-rr2-p5-nr2-x24.c107 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() local
112 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24()
113 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24()
118 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x48.c97 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48() local
101 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48()
105 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48()
Davx512f-rr1-p5-scalef-nr1fma-x48.c88 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48() local
92 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48()
96 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x48.c91 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48() local
95 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48()
99 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48()
Davx2-rr1-p5-nr1fma-x24.c95 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() local
99 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
104 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
Davx512f-rr1-p5-scalef-nr1fma-x64.c100 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() local
105 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
110 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
Davx2-rr1-p5-nr2fma-x56.c147 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local
155 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
163 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
171 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
Davx-rr2-p5-nr2-x32.c123 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() local
129 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
130 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
137 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x64.c109 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64() local
114 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64()
119 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x64.c103 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64() local
108 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64()
113 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64()
Davx2-rr1-p5-nr2fma-x64.c160 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local
169 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
178 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
187 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
Davx2-rr1-p5-nr2fma-x72.c173 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local
183 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
193 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
203 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x80.c115 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() local
121 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
127 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
Davx512f-rr1-p5-scalef-nr1fma-x80.c112 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() local
118 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
124 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
Davx2-rr1-p5-nr1fma-x32.c108 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() local
113 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32()
119 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32()

123