/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-nr2fma-x16.c | 79 const __m256 vd1 = _mm256_add_ps(ve1, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() local 82 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() 85 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() 88 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
|
D | scalar-lut2048-p1-div-x2.c | 74 const float vd1 = vy1 + vone; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() local 77 float vf1 = vy1 / vd1; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
|
D | avx2-rr1-p5-nr2fma-x24.c | 91 const __m256 vd1 = _mm256_add_ps(ve1, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() local 95 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() 99 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() 103 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
|
D | avx-rr2-p5-nr2-x16.c | 88 const __m256 vd1 = _mm256_add_ps(ve1, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() local 91 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() 95 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() 96 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x32.c | 76 const __m512 vd1 = _mm512_add_ps(ve1, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32() local 79 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32() 82 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32()
|
D | avx512f-rr1-p5-scalef-nr1fma-x32.c | 73 const __m512 vd1 = _mm512_add_ps(ve1, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32() local 76 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32() 79 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x32.c | 82 const __m512 vd1 = _mm512_add_ps(ve1, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32() local 85 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32() 88 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32()
|
D | scalar-p5-div-x2.c | 81 const float vd1 = ve1 + vone; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() local 84 float vf1 = ve1 / vd1; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
|
D | scalar-lut64-p2-div-x2.c | 77 const float vd1 = vy1 + vone; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() local 80 float vf1 = vy1 / vd1; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
|
D | avx2-rr1-p5-nr1fma-x16.c | 79 const __m256 vd1 = _mm256_add_ps(ve1, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() local 82 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() 85 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16()
|
D | avx2-rr1-p5-nr2fma-x32.c | 103 const __m256 vd1 = _mm256_add_ps(ve1, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() local 108 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 113 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 118 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
|
D | avx-rr2-p5-nr2-x24.c | 103 const __m256 vd1 = _mm256_add_ps(ve1, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() local 107 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 112 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 113 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x48.c | 93 const __m512 vd1 = _mm512_add_ps(ve1, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48() local 97 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48() 101 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48()
|
D | avx512f-rr1-p5-scalef-nr1fma-x48.c | 84 const __m512 vd1 = _mm512_add_ps(ve1, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48() local 88 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48() 92 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x48.c | 87 const __m512 vd1 = _mm512_add_ps(ve1, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48() local 91 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48() 95 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48()
|
D | avx2-rr1-p5-nr1fma-x24.c | 91 const __m256 vd1 = _mm256_add_ps(ve1, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() local 95 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() 99 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
|
D | avx512f-rr1-p5-scalef-nr1fma-x64.c | 95 const __m512 vd1 = _mm512_add_ps(ve1, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() local 100 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() 105 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
|
D | avx2-rr1-p5-nr2fma-x40.c | 115 const __m256 vd1 = _mm256_add_ps(ve1, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local 121 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 127 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 133 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
|
D | scalar-lut2048-p1-div-x4.c | 96 const float vd1 = vy1 + vone; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() local 101 float vf1 = vy1 / vd1; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
|
/external/llvm/test/CodeGen/PowerPC/ |
D | vsx-minmax.ll | 9 @vd1 = common global <2 x double> zeroinitializer, align 16 29 store <2 x double> %4, <2 x double>* @vd1, align 16 86 ;vector double vd1, vd2; 91 ; vd1 = vec_max(vd, vd);
|
/external/llvm-project/llvm/test/CodeGen/PowerPC/ |
D | vsx-minmax.ll | 9 @vd1 = common global <2 x double> zeroinitializer, align 16 29 store <2 x double> %4, <2 x double>* @vd1, align 16 86 ;vector double vd1, vd2; 91 ; vd1 = vec_max(vd, vd);
|
/external/XNNPACK/src/f32-ibilinear/gen/ |
D | scalar-c2.c | 65 const float vd1 = vb1 - vt1; in xnn_f32_ibilinear_ukernel__scalar_c2() local 68 const float vo1 = vt1 + vd1 * valphav; in xnn_f32_ibilinear_ukernel__scalar_c2()
|
D | scalar-c4.c | 81 const float vd1 = vb1 - vt1; in xnn_f32_ibilinear_ukernel__scalar_c4() local 86 const float vo1 = vt1 + vd1 * valphav; in xnn_f32_ibilinear_ukernel__scalar_c4()
|
/external/XNNPACK/src/f32-ibilinear-chw/gen/ |
D | scalar-p2.c | 67 const float vd1 = vb1 - vt1; in xnn_f32_ibilinear_chw_ukernel__scalar_p2() local 70 const float vo1 = vt1 + vd1 * valphav1; in xnn_f32_ibilinear_chw_ukernel__scalar_p2()
|
D | scalar-p4.c | 91 const float vd1 = vb1 - vt1; in xnn_f32_ibilinear_chw_ukernel__scalar_p4() local 96 const float vo1 = vt1 + vd1 * valphav1; in xnn_f32_ibilinear_chw_ukernel__scalar_p4()
|