/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-avx2-rr1-p5-nr2fma-x16.c | 80 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() local 83 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() 86 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() 89 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
|
D | vsigmoid-avx2-rr1-p5-nr2fma-x24.c | 93 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() local 97 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() 101 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() 105 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
|
D | vsigmoid-avx2-rr1-p5-nr2fma-x32.c | 106 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() local 111 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 116 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 121 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
|
D | vsigmoid-avx2-rr1-p5-nr2fma-x40.c | 119 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local 125 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 131 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 137 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
|
D | vsigmoid-avx-rr2-p5-nr2-x16.c | 89 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16() local 93 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16() 94 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16() 97 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16()
|
D | vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x32.c | 77 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32() local 80 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32() 83 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32()
|
D | vsigmoid-avx512f-rr1-p5-scalef-nr1fma-x32.c | 76 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32() local 79 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32() 82 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32()
|
D | vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x32.c | 75 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32() local 78 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32() 81 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32()
|
D | vsigmoid-avx2-rr1-p5-nr2fma-x48.c | 132 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() local 139 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 146 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 153 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
|
D | vsigmoid-avx512f-rr1-p5-scalef-nr1fma-x48.c | 88 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48() local 92 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48() 96 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48()
|
D | vsigmoid-avx2-rr1-p5-nr1fma-x16.c | 80 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() local 83 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() 87 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x16()
|
D | vsigmoid-avx2-rr1-p5-nr2fma-x56.c | 145 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local 153 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 161 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 169 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
|
D | vsigmoid-avx-rr2-p5-nr2-x24.c | 105 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24() local 110 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24() 111 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24() 116 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24()
|
D | vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x48.c | 89 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48() local 93 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48() 97 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48()
|
D | vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x48.c | 87 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48() local 91 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48() 95 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48()
|
D | vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x64.c | 101 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64() local 106 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64() 111 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64()
|
D | vsigmoid-avx512f-rr1-p5-scalef-nr1fma-x64.c | 100 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() local 105 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() 110 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
|
D | vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x64.c | 99 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64() local 104 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64() 109 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64()
|
D | vsigmoid-avx2-rr1-p5-nr1fma-x24.c | 93 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() local 97 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() 102 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
|
D | vsigmoid-avx2-rr1-p5-nr2fma-x64.c | 158 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local 167 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 176 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 185 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
|
/external/XNNPACK/src/cs16-vsquareabs/gen/ |
D | scalar-x2.c | 30 const int32_t vr1 = (int32_t) input[2]; in xnn_cs16_vsquareabs_ukernel__scalar_x2() local 36 const uint32_t vrsquare1 = (uint32_t) (vr1 * vr1); in xnn_cs16_vsquareabs_ukernel__scalar_x2()
|
D | scalar-x3.c | 30 const int32_t vr1 = (int32_t) input[2]; in xnn_cs16_vsquareabs_ukernel__scalar_x3() local 38 const uint32_t vrsquare1 = (uint32_t) (vr1 * vr1); in xnn_cs16_vsquareabs_ukernel__scalar_x3()
|
D | scalar-x4.c | 30 const int32_t vr1 = (int32_t) input[2]; in xnn_cs16_vsquareabs_ukernel__scalar_x4() local 40 const uint32_t vrsquare1 = (uint32_t) (vr1 * vr1); in xnn_cs16_vsquareabs_ukernel__scalar_x4()
|
/external/XNNPACK/src/f16-vsigmoid/gen/ |
D | vsigmoid-neonfp16arith-rr2-p2-nr1fma-x16.c | 72 float16x8_t vr1 = vrecpeq_f16(vd1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16() local 75 const float16x8_t vadj1 = vfmsq_f16(vone, vr1, vd1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16() 78 vr1 = vfmaq_f16(vr1, vr1, vadj1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16() 81 float16x8_t vf1 = vmulq_f16(ve1, vr1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16()
|
D | vsigmoid-neonfp16arith-rr2-p2-nr1fma-x24.c | 83 float16x8_t vr1 = vrecpeq_f16(vd1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24() local 87 const float16x8_t vadj1 = vfmsq_f16(vone, vr1, vd1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24() 91 vr1 = vfmaq_f16(vr1, vr1, vadj1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24() 95 float16x8_t vf1 = vmulq_f16(ve1, vr1); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24()
|