Home
last modified time | relevance | path

Searched refs:vr1 (Results 1 – 25 of 81) sorted by relevance

1234

/external/XNNPACK/src/f32-vsigmoid/gen/
Dvsigmoid-avx2-rr1-p5-nr2fma-x16.c80 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() local
83 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
86 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
89 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
Dvsigmoid-avx2-rr1-p5-nr2fma-x24.c93 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() local
97 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
101 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
105 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
Dvsigmoid-avx2-rr1-p5-nr2fma-x32.c106 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() local
111 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
116 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
121 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
Dvsigmoid-avx2-rr1-p5-nr2fma-x40.c119 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local
125 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
131 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
137 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
Dvsigmoid-avx-rr2-p5-nr2-x16.c89 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16() local
93 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16()
94 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16()
97 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16()
Dvsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x32.c77 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32() local
80 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32()
83 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32()
Dvsigmoid-avx512f-rr1-p5-scalef-nr1fma-x32.c76 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32() local
79 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32()
82 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32()
Dvsigmoid-avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x32.c75 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32() local
78 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32()
81 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32()
Dvsigmoid-avx2-rr1-p5-nr2fma-x48.c132 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() local
139 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
146 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
153 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
Dvsigmoid-avx512f-rr1-p5-scalef-nr1fma-x48.c88 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48() local
92 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48()
96 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48()
Dvsigmoid-avx2-rr1-p5-nr1fma-x16.c80 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() local
83 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x16()
87 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x16()
Dvsigmoid-avx2-rr1-p5-nr2fma-x56.c145 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local
153 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
161 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
169 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
Dvsigmoid-avx-rr2-p5-nr2-x24.c105 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24() local
110 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24()
111 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24()
116 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24()
Dvsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x48.c89 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48() local
93 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48()
97 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48()
Dvsigmoid-avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x48.c87 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48() local
91 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48()
95 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48()
Dvsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x64.c101 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64() local
106 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64()
111 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64()
Dvsigmoid-avx512f-rr1-p5-scalef-nr1fma-x64.c100 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() local
105 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
110 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
Dvsigmoid-avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x64.c99 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64() local
104 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64()
109 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64()
Dvsigmoid-avx2-rr1-p5-nr1fma-x24.c93 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() local
97 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
102 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
Dvsigmoid-avx2-rr1-p5-nr2fma-x64.c158 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local
167 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
176 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
185 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
/external/XNNPACK/src/cs16-vsquareabs/gen/
Dscalar-x2.c30 const int32_t vr1 = (int32_t) input[2]; in xnn_cs16_vsquareabs_ukernel__scalar_x2() local
36 const uint32_t vrsquare1 = (uint32_t) (vr1 * vr1); in xnn_cs16_vsquareabs_ukernel__scalar_x2()
Dscalar-x3.c30 const int32_t vr1 = (int32_t) input[2]; in xnn_cs16_vsquareabs_ukernel__scalar_x3() local
38 const uint32_t vrsquare1 = (uint32_t) (vr1 * vr1); in xnn_cs16_vsquareabs_ukernel__scalar_x3()
Dscalar-x4.c30 const int32_t vr1 = (int32_t) input[2]; in xnn_cs16_vsquareabs_ukernel__scalar_x4() local
40 const uint32_t vrsquare1 = (uint32_t) (vr1 * vr1); in xnn_cs16_vsquareabs_ukernel__scalar_x4()
/external/XNNPACK/src/f16-vsigmoid/gen/
Dvsigmoid-neonfp16arith-rr2-p2-nr1fma-x16.c72 float16x8_t vr1 = vrecpeq_f16(vd1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16() local
75 const float16x8_t vadj1 = vfmsq_f16(vone, vr1, vd1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16()
78 vr1 = vfmaq_f16(vr1, vr1, vadj1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16()
81 float16x8_t vf1 = vmulq_f16(ve1, vr1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16()
Dvsigmoid-neonfp16arith-rr2-p2-nr1fma-x24.c83 float16x8_t vr1 = vrecpeq_f16(vd1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24() local
87 const float16x8_t vadj1 = vfmsq_f16(vone, vr1, vd1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24()
91 vr1 = vfmaq_f16(vr1, vr1, vadj1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24()
95 float16x8_t vf1 = vmulq_f16(ve1, vr1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24()

1234