/external/llvm-project/llvm/test/CodeGen/PowerPC/ |
D | vec_conv_fp32_to_i64_elts.ll | 180 ; CHECK-P8-NEXT: xxmrglw vs5, v4, v4 190 ; CHECK-P8-NEXT: xvcvspdp vs5, vs5 198 ; CHECK-P8-NEXT: xvcvdpuxds v5, vs5 215 ; CHECK-P8-NEXT: xxswapd vs5, v6 222 ; CHECK-P8-NEXT: stxvd2x vs5, 0, r3 230 ; CHECK-P9-NEXT: lxv vs5, 32(r4) 235 ; CHECK-P9-NEXT: xxmrglw vs6, vs5, vs5 236 ; CHECK-P9-NEXT: xxmrghw vs5, vs5, vs5 244 ; CHECK-P9-NEXT: xvcvspdp vs5, vs5 252 ; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5 [all …]
|
D | vec_conv_i32_to_fp64_elts.ll | 173 ; CHECK-P8-NEXT: xvcvuxwdp vs5, v0 183 ; CHECK-P8-NEXT: xxswapd vs1, vs5 185 ; CHECK-P8-NEXT: xxswapd vs5, vs6 193 ; CHECK-P8-NEXT: stxvd2x vs5, 0, r3 200 ; CHECK-P9-NEXT: lxv vs5, 32(r4) 212 ; CHECK-P9-NEXT: xxmrglw v2, vs5, vs5 214 ; CHECK-P9-NEXT: xxmrghw v2, vs5, vs5 217 ; CHECK-P9-NEXT: xvcvuxwdp vs5, v2 222 ; CHECK-P9-NEXT: stxv vs5, 80(r3) 232 ; CHECK-BE-NEXT: lxv vs5, 32(r4) [all …]
|
D | vec_conv_fp_to_i_8byte_elts.ll | 143 ; CHECK-P8-NEXT: lxvd2x vs5, r4, r10 151 ; CHECK-P8-NEXT: xvcvdpuxds vs5, vs5 156 ; CHECK-P8-NEXT: stxvd2x vs5, r3, r10 172 ; CHECK-P9-NEXT: lxv vs5, 32(r4) 177 ; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5 187 ; CHECK-P9-NEXT: stxv vs5, 32(r3) 200 ; CHECK-BE-NEXT: lxv vs5, 32(r4) 205 ; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5 215 ; CHECK-BE-NEXT: stxv vs5, 32(r3) 357 ; CHECK-P8-NEXT: lxvd2x vs5, r4, r10 [all …]
|
D | vec_conv_fp64_to_i32_elts.ll | 188 ; CHECK-P8-NEXT: lxvd2x vs5, r4, r8 197 ; CHECK-P8-NEXT: xxswapd vs5, vs5 204 ; CHECK-P8-NEXT: xxmrgld vs4, vs6, vs5 207 ; CHECK-P8-NEXT: xxmrghd vs0, vs6, vs5 208 ; CHECK-P8-NEXT: xxmrgld vs5, vs2, vs1 214 ; CHECK-P8-NEXT: xvcvdpuxws v6, vs5 231 ; CHECK-P9-NEXT: lxv vs5, 48(r4) 234 ; CHECK-P9-NEXT: xxmrgld vs7, vs5, vs4 235 ; CHECK-P9-NEXT: xxmrghd vs4, vs5, vs4 267 ; CHECK-BE-NEXT: lxv vs5, 32(r4) [all …]
|
D | vec_conv_i_to_fp_8byte_elts.ll | 143 ; CHECK-P8-NEXT: lxvd2x vs5, r4, r10 151 ; CHECK-P8-NEXT: xvcvuxddp vs5, vs5 156 ; CHECK-P8-NEXT: stxvd2x vs5, r3, r10 179 ; CHECK-P9-NEXT: xvcvuxddp vs5, v4 184 ; CHECK-P9-NEXT: stxv vs5, 80(r3) 207 ; CHECK-BE-NEXT: xvcvuxddp vs5, v4 212 ; CHECK-BE-NEXT: stxv vs5, 80(r3) 357 ; CHECK-P8-NEXT: lxvd2x vs5, r4, r10 365 ; CHECK-P8-NEXT: xvcvsxddp vs5, vs5 370 ; CHECK-P8-NEXT: stxvd2x vs5, r3, r10 [all …]
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-scalar-rr2-lut16-p3-x6.c | 96 float vs5 = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx5] + ven5); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local 125 vs5 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 153 vt5 *= vs5; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 154 vs5 -= vone; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 173 const float ve5 = (vp5 + vs5) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
|
D | velu-scalar-rr2-p6-x6.c | 76 float vs5 = fp32_from_bits(fp32_to_bits(vn5) << 23); in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local 114 vs5 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 163 vt5 *= vs5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 164 vs5 -= vone; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 183 const float ve5 = (vp5 + vs5) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
|
D | velu-wasm-rr2-p6-x6.c | 76 float vs5 = fp32_from_bits(fp32_to_bits(vn5) << 23); in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() local 139 vt5 *= vs5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 140 vs5 -= vone; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 159 const float ve5 = (vp5 + vs5) * valpha; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
|
D | velu-wasm-rr2-lut16-p3-x6.c | 96 float vs5 = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx5] + ven5); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local 129 vt5 *= vs5; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 130 vs5 -= vone; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 149 const float ve5 = (vp5 + vs5) * valpha; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
|
D | velu-avx2-rr1-lut16-p3-gather-x48.c | 101 __m256 vs5 = _mm256_castsi256_ps(_mm256_add_epi32(vl5, ven5)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() local 122 vt5 = _mm256_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() 134 vs5 = _mm256_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() 147 const __m256 ve5 = _mm256_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
|
D | velu-avx512f-rr1-lut16-p3-perm-x96.c | 88 __m512 vs5 = _mm512_castsi512_ps(_mm512_add_epi32(vl5, ven5)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() local 116 vt5 = _mm512_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 123 vs5 = _mm512_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 143 __m512 vy5 = _mm512_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
|
D | velu-avx2-rr1-lut8-p4-perm-x48.c | 94 __m256 vs5 = _mm256_castsi256_ps(_mm256_add_epi32(vl5, ven5)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() local 122 vt5 = _mm256_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() 134 vs5 = _mm256_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() 147 const __m256 ve5 = _mm256_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
|
D | velu-avx512f-rr1-p6-x96.c | 75 __m512 vs5 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn5), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() local 124 vt5 = _mm512_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 131 vs5 = _mm512_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 151 __m512 vy5 = _mm512_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96()
|
D | velu-avx2-rr1-lut4-p4-perm-x48.c | 95 __m256 vs5 = _mm256_castsi256_ps(_mm256_add_epi32(vl5, ven5)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48() local 123 vt5 = _mm256_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48() 135 vs5 = _mm256_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48() 148 const __m256 ve5 = _mm256_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48()
|
D | velu-avx2-rr1-p6-x48.c | 75 __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() local 124 vt5 = _mm256_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() 136 vs5 = _mm256_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() 149 const __m256 ve5 = _mm256_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48()
|
D | velu-avx512f-rr1-lut16-p3-perm-x112.c | 93 __m512 vs5 = _mm512_castsi512_ps(_mm512_add_epi32(vl5, ven5)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() local 125 vt5 = _mm512_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 134 vs5 = _mm512_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 156 __m512 vy5 = _mm512_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
|
D | velu-avx512f-rr1-p6-x112.c | 78 __m512 vs5 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn5), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() local 134 vt5 = _mm512_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 143 vs5 = _mm512_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 165 __m512 vy5 = _mm512_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112()
|
D | velu-avx512f-rr1-lut16-p3-perm-x128.c | 98 __m512 vs5 = _mm512_castsi512_ps(_mm512_add_epi32(vl5, ven5)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() local 134 vt5 = _mm512_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 145 vs5 = _mm512_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 169 __m512 vy5 = _mm512_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
|
D | velu-avx2-rr1-lut4-p4-perm-x56.c | 101 __m256 vs5 = _mm256_castsi256_ps(_mm256_add_epi32(vl5, ven5)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56() local 133 vt5 = _mm256_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56() 147 vs5 = _mm256_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56() 162 const __m256 ve5 = _mm256_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56()
|
D | velu-avx2-rr1-lut8-p4-perm-x56.c | 100 __m256 vs5 = _mm256_castsi256_ps(_mm256_add_epi32(vl5, ven5)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() local 132 vt5 = _mm256_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 146 vs5 = _mm256_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 161 const __m256 ve5 = _mm256_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
|
D | velu-avx2-rr1-lut16-p3-gather-x56.c | 108 __m256 vs5 = _mm256_castsi256_ps(_mm256_add_epi32(vl5, ven5)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() local 132 vt5 = _mm256_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() 146 vs5 = _mm256_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() 161 const __m256 ve5 = _mm256_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
|
D | velu-avx2-rr1-p6-x56.c | 78 __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() local 134 vt5 = _mm256_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 148 vs5 = _mm256_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 163 const __m256 ve5 = _mm256_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56()
|
D | velu-avx2-rr1-p6-x64.c | 81 __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() local 144 vt5 = _mm256_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 160 vs5 = _mm256_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 177 const __m256 ve5 = _mm256_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64()
|
D | velu-avx512f-rr1-p6-x128.c | 81 __m512 vs5 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn5), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() local 144 vt5 = _mm512_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 155 vs5 = _mm512_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 179 __m512 vy5 = _mm512_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128()
|
D | velu-avx2-rr1-lut4-p4-perm-x64.c | 107 __m256 vs5 = _mm256_castsi256_ps(_mm256_add_epi32(vl5, ven5)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64() local 143 vt5 = _mm256_mul_ps(vt5, vs5); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64() 159 vs5 = _mm256_fmsub_ps(vs5, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64() 176 const __m256 ve5 = _mm256_fmadd_ps(vp5, valpha, vs5); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64()
|