/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx2-p5-x8.c | 81 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() local 87 ve0 = _mm256_max_ps(ve0, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() 92 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve0, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8()
|
D | avx2-p5-x16.c | 91 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16() local 98 ve0 = _mm256_max_ps(ve0, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16() 104 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve0, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16()
|
D | avx2-p5-x24.c | 101 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x24() local 109 ve0 = _mm256_max_ps(ve0, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x24() 116 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve0, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x24()
|
D | avx2-p5-x32.c | 111 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() local 120 ve0 = _mm256_max_ps(ve0, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 128 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve0, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32()
|
D | avx2-p5-x40.c | 121 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() local 131 ve0 = _mm256_max_ps(ve0, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 140 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve0, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
|
D | avx2-p5-x48.c | 131 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() local 142 ve0 = _mm256_max_ps(ve0, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 152 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve0, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48()
|
D | avx2-p5-x56.c | 141 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() local 153 ve0 = _mm256_max_ps(ve0, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 164 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve0, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56()
|
D | avx512f-p5-scalef-x16.c | 75 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() local 78 vf0 = _mm512_scalef_ps(vf0, ve0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | scalar-p5-div-x2.c | 105 const float ve0 = vt0 * vp0 + vs0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() local 109 float vf0 = ve0 / (ve0 + vone); in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
|
D | scalar-p5-div-x4.c | 129 const float ve0 = vt0 * vp0 + vs0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() local 135 float vf0 = ve0 / (ve0 + vone); in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
|
D | scalar-lut2048-p1-div-x2.c | 79 const uint32_t ve0 = (fp32_to_bits(vn0) & ~vindex_mask) << 12; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() local 86 const float vs0 = fp32_from_bits(xnn_table_exp2_k_over_2048[vidx0] + ve0); in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
|
D | scalar-lut64-p2-div-x2.c | 79 const uint32_t ve0 = (fp32_to_bits(vn0) & ~vindex_mask) << 17; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() local 86 const float vs0 = fp32_from_bits(xnn_table_exp2_k_over_64[vidx0] + ve0); in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
|
D | avx2-rr1-p5-div-x16.c | 100 const __m256 ve0 = _mm256_fmadd_ps(vt0, vp0, vs0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16() local 104 const __m256 vd0 = _mm256_add_ps(ve0, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16() 108 __m256 vf0 = _mm256_div_ps(ve0, vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16()
|
D | avx2-rr1-p5-nr1fma-x16.c | 100 const __m256 ve0 = _mm256_fmadd_ps(vt0, vp0, vs0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() local 104 const __m256 vd0 = _mm256_add_ps(ve0, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() 118 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16()
|
D | avx2-rr1-p5-div-x24.c | 111 const __m256 ve0 = _mm256_fmadd_ps(vt0, vp0, vs0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x24() local 116 const __m256 vd0 = _mm256_add_ps(ve0, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x24() 121 __m256 vf0 = _mm256_div_ps(ve0, vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x24()
|
D | avx2-rr1-p5-nr2fma-x16.c | 100 const __m256 ve0 = _mm256_fmadd_ps(vt0, vp0, vs0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() local 104 const __m256 vd0 = _mm256_add_ps(ve0, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() 120 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
|
D | avx2-rr1-p5-nr1fma-x24.c | 111 const __m256 ve0 = _mm256_fmadd_ps(vt0, vp0, vs0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() local 116 const __m256 vd0 = _mm256_add_ps(ve0, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() 133 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
|
D | scalar-lut2048-p1-div-x4.c | 85 const uint32_t ve0 = (fp32_to_bits(vn0) & ~vindex_mask) << 12; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() local 96 const float vs0 = fp32_from_bits(xnn_table_exp2_k_over_2048[vidx0] + ve0); in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
|
/external/llvm/test/CodeGen/AMDGPU/ |
D | undefined-subreg-liverange.ll | 23 %ve0 = extractelement <4 x float> %v3, i32 0 24 store float %ve0, float addrspace(3)* undef, align 4
|
D | rename-disconnected-bug.ll | 30 %ve0 = extractelement <4 x float> %v3, i32 0 31 store float %ve0, float addrspace(3)* undef, align 4
|
/external/swiftshader/third_party/llvm-7.0/llvm/test/CodeGen/AMDGPU/ |
D | rename-disconnected-bug.ll | 30 %ve0 = extractelement <4 x float> %v3, i32 0 31 store float %ve0, float addrspace(3)* undef, align 4
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | scalar-lut64-p2-x2-acc2.c | 74 const uint32_t ve0 = (fp32_to_bits(vn0) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2() local 81 const float vs0 = fp32_from_bits(xnn_table_exp2_k_over_64[vidx0] + ve0); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2()
|
D | scalar-lut64-p2-x2.c | 73 const uint32_t ve0 = (fp32_to_bits(vn0) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2() local 80 const float vs0 = fp32_from_bits(xnn_table_exp2_k_over_64[vidx0] + ve0); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2()
|
D | scalar-lut64-p2-x4-acc2.c | 80 const uint32_t ve0 = (fp32_to_bits(vn0) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() local 91 const float vs0 = fp32_from_bits(xnn_table_exp2_k_over_64[vidx0] + ve0); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
|
D | scalar-lut64-p2-x4.c | 79 const uint32_t ve0 = (fp32_to_bits(vn0) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() local 90 const float vs0 = fp32_from_bits(xnn_table_exp2_k_over_64[vidx0] + ve0); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
|