1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s 2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s 3 4define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 5 ; CHECK: add_v4f32: 6 7 %1 = load <4 x float>, <4 x float>* %a 8 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 9 %2 = load <4 x float>, <4 x float>* %b 10 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 11 %3 = fadd <4 x float> %1, %2 12 ; CHECK-DAG: fadd.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 13 store <4 x float> %3, <4 x float>* %c 14 ; CHECK-DAG: st.w [[R3]], 0($4) 15 16 ret void 17 ; CHECK: .size add_v4f32 18} 19 20define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 21 ; CHECK: add_v2f64: 22 23 %1 = load <2 x double>, <2 x double>* %a 24 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 25 %2 = load <2 x double>, <2 x double>* %b 26 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 27 %3 = fadd <2 x double> %1, %2 28 ; CHECK-DAG: fadd.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 29 store <2 x double> %3, <2 x double>* %c 30 ; CHECK-DAG: st.d [[R3]], 0($4) 31 32 ret void 33 ; CHECK: .size add_v2f64 34} 35 36define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 37 ; CHECK: sub_v4f32: 38 39 %1 = load <4 x float>, <4 x float>* %a 40 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 41 %2 = load <4 x float>, <4 x float>* %b 42 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 43 %3 = fsub <4 x float> %1, %2 44 ; CHECK-DAG: fsub.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 45 store <4 x float> %3, <4 x float>* %c 46 ; CHECK-DAG: st.w [[R3]], 0($4) 47 48 ret void 49 ; CHECK: .size sub_v4f32 50} 51 52define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 53 ; CHECK: sub_v2f64: 54 55 %1 = load <2 x double>, <2 x double>* %a 56 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 57 %2 = load <2 x double>, <2 x double>* %b 58 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 59 %3 = fsub <2 x double> %1, %2 60 ; CHECK-DAG: fsub.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 61 store <2 x double> %3, <2 x double>* %c 62 ; CHECK-DAG: st.d [[R3]], 0($4) 63 64 ret void 65 ; CHECK: .size sub_v2f64 66} 67 68define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 69 ; CHECK: mul_v4f32: 70 71 %1 = load <4 x float>, <4 x float>* %a 72 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 73 %2 = load <4 x float>, <4 x float>* %b 74 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 75 %3 = fmul <4 x float> %1, %2 76 ; CHECK-DAG: fmul.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 77 store <4 x float> %3, <4 x float>* %c 78 ; CHECK-DAG: st.w [[R3]], 0($4) 79 80 ret void 81 ; CHECK: .size mul_v4f32 82} 83 84define void @mul_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 85 ; CHECK: mul_v2f64: 86 87 %1 = load <2 x double>, <2 x double>* %a 88 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 89 %2 = load <2 x double>, <2 x double>* %b 90 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 91 %3 = fmul <2 x double> %1, %2 92 ; CHECK-DAG: fmul.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 93 store <2 x double> %3, <2 x double>* %c 94 ; CHECK-DAG: st.d [[R3]], 0($4) 95 96 ret void 97 ; CHECK: .size mul_v2f64 98} 99 100define void @fma_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b, 101 <4 x float>* %c) nounwind { 102 ; CHECK: fma_v4f32: 103 104 %1 = load <4 x float>, <4 x float>* %a 105 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 106 %2 = load <4 x float>, <4 x float>* %b 107 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 108 %3 = load <4 x float>, <4 x float>* %c 109 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) 110 %4 = tail call <4 x float> @llvm.fma.v4f32 (<4 x float> %1, <4 x float> %2, 111 <4 x float> %3) 112 ; CHECK-DAG: fmadd.w [[R1]], [[R2]], [[R3]] 113 store <4 x float> %4, <4 x float>* %d 114 ; CHECK-DAG: st.w [[R1]], 0($4) 115 116 ret void 117 ; CHECK: .size fma_v4f32 118} 119 120define void @fma_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, 121 <2 x double>* %c) nounwind { 122 ; CHECK: fma_v2f64: 123 124 %1 = load <2 x double>, <2 x double>* %a 125 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 126 %2 = load <2 x double>, <2 x double>* %b 127 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 128 %3 = load <2 x double>, <2 x double>* %c 129 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) 130 %4 = tail call <2 x double> @llvm.fma.v2f64 (<2 x double> %1, <2 x double> %2, 131 <2 x double> %3) 132 ; CHECK-DAG: fmadd.d [[R1]], [[R2]], [[R3]] 133 store <2 x double> %4, <2 x double>* %d 134 ; CHECK-DAG: st.d [[R1]], 0($4) 135 136 ret void 137 ; CHECK: .size fma_v2f64 138} 139 140define void @fmsub_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b, 141 <4 x float>* %c) nounwind { 142 ; CHECK: fmsub_v4f32: 143 144 %1 = load <4 x float>, <4 x float>* %a 145 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 146 %2 = load <4 x float>, <4 x float>* %b 147 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 148 %3 = load <4 x float>, <4 x float>* %c 149 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) 150 %4 = fmul <4 x float> %2, %3 151 %5 = fsub <4 x float> %1, %4 152 ; CHECK-DAG: fmsub.w [[R1]], [[R2]], [[R3]] 153 store <4 x float> %5, <4 x float>* %d 154 ; CHECK-DAG: st.w [[R1]], 0($4) 155 156 ret void 157 ; CHECK: .size fmsub_v4f32 158} 159 160define void @fmsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, 161 <2 x double>* %c) nounwind { 162 ; CHECK: fmsub_v2f64: 163 164 %1 = load <2 x double>, <2 x double>* %a 165 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 166 %2 = load <2 x double>, <2 x double>* %b 167 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 168 %3 = load <2 x double>, <2 x double>* %c 169 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) 170 %4 = fmul <2 x double> %2, %3 171 %5 = fsub <2 x double> %1, %4 172 ; CHECK-DAG: fmsub.d [[R1]], [[R2]], [[R3]] 173 store <2 x double> %5, <2 x double>* %d 174 ; CHECK-DAG: st.d [[R1]], 0($4) 175 176 ret void 177 ; CHECK: .size fmsub_v2f64 178} 179 180define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 181 ; CHECK: fdiv_v4f32: 182 183 %1 = load <4 x float>, <4 x float>* %a 184 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 185 %2 = load <4 x float>, <4 x float>* %b 186 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 187 %3 = fdiv <4 x float> %1, %2 188 ; CHECK-DAG: fdiv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 189 store <4 x float> %3, <4 x float>* %c 190 ; CHECK-DAG: st.w [[R3]], 0($4) 191 192 ret void 193 ; CHECK: .size fdiv_v4f32 194} 195 196define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 197 ; CHECK: fdiv_v2f64: 198 199 %1 = load <2 x double>, <2 x double>* %a 200 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 201 %2 = load <2 x double>, <2 x double>* %b 202 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 203 %3 = fdiv <2 x double> %1, %2 204 ; CHECK-DAG: fdiv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 205 store <2 x double> %3, <2 x double>* %c 206 ; CHECK-DAG: st.d [[R3]], 0($4) 207 208 ret void 209 ; CHECK: .size fdiv_v2f64 210} 211 212define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 213 ; CHECK: fabs_v4f32: 214 215 %1 = load <4 x float>, <4 x float>* %a 216 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 217 %2 = tail call <4 x float> @llvm.fabs.v4f32 (<4 x float> %1) 218 ; CHECK-DAG: fmax_a.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] 219 store <4 x float> %2, <4 x float>* %c 220 ; CHECK-DAG: st.w [[R3]], 0($4) 221 222 ret void 223 ; CHECK: .size fabs_v4f32 224} 225 226define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 227 ; CHECK: fabs_v2f64: 228 229 %1 = load <2 x double>, <2 x double>* %a 230 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 231 %2 = tail call <2 x double> @llvm.fabs.v2f64 (<2 x double> %1) 232 ; CHECK-DAG: fmax_a.d [[R3:\$w[0-9]+]], [[R1]], [[R1]] 233 store <2 x double> %2, <2 x double>* %c 234 ; CHECK-DAG: st.d [[R3]], 0($4) 235 236 ret void 237 ; CHECK: .size fabs_v2f64 238} 239 240define void @fexp2_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 241 ; CHECK: fexp2_v4f32: 242 243 %1 = load <4 x float>, <4 x float>* %a 244 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 245 %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1) 246 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 247 ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]] 248 ; CHECK-DAG: fexp2.w [[R4:\$w[0-9]+]], [[R3]], [[R1]] 249 store <4 x float> %2, <4 x float>* %c 250 ; CHECK-DAG: st.w [[R4]], 0($4) 251 252 ret void 253 ; CHECK: .size fexp2_v4f32 254} 255 256define void @fexp2_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 257 ; CHECK: fexp2_v2f64: 258 259 %1 = load <2 x double>, <2 x double>* %a 260 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 261 %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1) 262 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1 263 ; CHECK-DAG: ffint_u.d [[R4:\$w[0-9]+]], [[R3]] 264 ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]] 265 store <2 x double> %2, <2 x double>* %c 266 ; CHECK-DAG: st.d [[R4]], 0($4) 267 268 ret void 269 ; CHECK: .size fexp2_v2f64 270} 271 272define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind { 273 ; CHECK: fexp2_v4f32_2: 274 275 %1 = load <4 x float>, <4 x float>* %a 276 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 277 %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1) 278 %3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2 279 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 280 ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]] 281 ; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]] 282 store <4 x float> %3, <4 x float>* %c 283 ; CHECK-DAG: st.w [[R5]], 0($4) 284 285 ret void 286 ; CHECK: .size fexp2_v4f32_2 287} 288 289define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind { 290 ; CHECK: fexp2_v2f64_2: 291 292 %1 = load <2 x double>, <2 x double>* %a 293 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 294 %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1) 295 %3 = fmul <2 x double> <double 2.0, double 2.0>, %2 296 ; CHECK-DAG: ldi.d [[R2:\$w[0-9]+]], 1 297 ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R2]] 298 ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]] 299 store <2 x double> %3, <2 x double>* %c 300 ; CHECK-DAG: st.d [[R4]], 0($4) 301 302 ret void 303 ; CHECK: .size fexp2_v2f64_2 304} 305 306define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 307 ; CHECK: fsqrt_v4f32: 308 309 %1 = load <4 x float>, <4 x float>* %a 310 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 311 %2 = tail call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %1) 312 ; CHECK-DAG: fsqrt.w [[R3:\$w[0-9]+]], [[R1]] 313 store <4 x float> %2, <4 x float>* %c 314 ; CHECK-DAG: st.w [[R3]], 0($4) 315 316 ret void 317 ; CHECK: .size fsqrt_v4f32 318} 319 320define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 321 ; CHECK: fsqrt_v2f64: 322 323 %1 = load <2 x double>, <2 x double>* %a 324 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 325 %2 = tail call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %1) 326 ; CHECK-DAG: fsqrt.d [[R3:\$w[0-9]+]], [[R1]] 327 store <2 x double> %2, <2 x double>* %c 328 ; CHECK-DAG: st.d [[R3]], 0($4) 329 330 ret void 331 ; CHECK: .size fsqrt_v2f64 332} 333 334define void @ffint_u_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind { 335 ; CHECK: ffint_u_v4f32: 336 337 %1 = load <4 x i32>, <4 x i32>* %a 338 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 339 %2 = uitofp <4 x i32> %1 to <4 x float> 340 ; CHECK-DAG: ffint_u.w [[R3:\$w[0-9]+]], [[R1]] 341 store <4 x float> %2, <4 x float>* %c 342 ; CHECK-DAG: st.w [[R3]], 0($4) 343 344 ret void 345 ; CHECK: .size ffint_u_v4f32 346} 347 348define void @ffint_u_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind { 349 ; CHECK: ffint_u_v2f64: 350 351 %1 = load <2 x i64>, <2 x i64>* %a 352 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 353 %2 = uitofp <2 x i64> %1 to <2 x double> 354 ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R1]] 355 store <2 x double> %2, <2 x double>* %c 356 ; CHECK-DAG: st.d [[R3]], 0($4) 357 358 ret void 359 ; CHECK: .size ffint_u_v2f64 360} 361 362define void @ffint_s_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind { 363 ; CHECK: ffint_s_v4f32: 364 365 %1 = load <4 x i32>, <4 x i32>* %a 366 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 367 %2 = sitofp <4 x i32> %1 to <4 x float> 368 ; CHECK-DAG: ffint_s.w [[R3:\$w[0-9]+]], [[R1]] 369 store <4 x float> %2, <4 x float>* %c 370 ; CHECK-DAG: st.w [[R3]], 0($4) 371 372 ret void 373 ; CHECK: .size ffint_s_v4f32 374} 375 376define void @ffint_s_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind { 377 ; CHECK: ffint_s_v2f64: 378 379 %1 = load <2 x i64>, <2 x i64>* %a 380 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 381 %2 = sitofp <2 x i64> %1 to <2 x double> 382 ; CHECK-DAG: ffint_s.d [[R3:\$w[0-9]+]], [[R1]] 383 store <2 x double> %2, <2 x double>* %c 384 ; CHECK-DAG: st.d [[R3]], 0($4) 385 386 ret void 387 ; CHECK: .size ffint_s_v2f64 388} 389 390define void @ftrunc_u_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind { 391 ; CHECK: ftrunc_u_v4f32: 392 393 %1 = load <4 x float>, <4 x float>* %a 394 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 395 %2 = fptoui <4 x float> %1 to <4 x i32> 396 ; CHECK-DAG: ftrunc_u.w [[R3:\$w[0-9]+]], [[R1]] 397 store <4 x i32> %2, <4 x i32>* %c 398 ; CHECK-DAG: st.w [[R3]], 0($4) 399 400 ret void 401 ; CHECK: .size ftrunc_u_v4f32 402} 403 404define void @ftrunc_u_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind { 405 ; CHECK: ftrunc_u_v2f64: 406 407 %1 = load <2 x double>, <2 x double>* %a 408 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 409 %2 = fptoui <2 x double> %1 to <2 x i64> 410 ; CHECK-DAG: ftrunc_u.d [[R3:\$w[0-9]+]], [[R1]] 411 store <2 x i64> %2, <2 x i64>* %c 412 ; CHECK-DAG: st.d [[R3]], 0($4) 413 414 ret void 415 ; CHECK: .size ftrunc_u_v2f64 416} 417 418define void @ftrunc_s_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind { 419 ; CHECK: ftrunc_s_v4f32: 420 421 %1 = load <4 x float>, <4 x float>* %a 422 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 423 %2 = fptosi <4 x float> %1 to <4 x i32> 424 ; CHECK-DAG: ftrunc_s.w [[R3:\$w[0-9]+]], [[R1]] 425 store <4 x i32> %2, <4 x i32>* %c 426 ; CHECK-DAG: st.w [[R3]], 0($4) 427 428 ret void 429 ; CHECK: .size ftrunc_s_v4f32 430} 431 432define void @ftrunc_s_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind { 433 ; CHECK: ftrunc_s_v2f64: 434 435 %1 = load <2 x double>, <2 x double>* %a 436 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 437 %2 = fptosi <2 x double> %1 to <2 x i64> 438 ; CHECK-DAG: ftrunc_s.d [[R3:\$w[0-9]+]], [[R1]] 439 store <2 x i64> %2, <2 x i64>* %c 440 ; CHECK-DAG: st.d [[R3]], 0($4) 441 442 ret void 443 ; CHECK: .size ftrunc_s_v2f64 444} 445 446declare <4 x float> @llvm.fabs.v4f32(<4 x float> %Val) 447declare <2 x double> @llvm.fabs.v2f64(<2 x double> %Val) 448declare <4 x float> @llvm.exp2.v4f32(<4 x float> %val) 449declare <2 x double> @llvm.exp2.v2f64(<2 x double> %val) 450declare <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, 451 <4 x float> %c) 452declare <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, 453 <2 x double> %c) 454declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %Val) 455declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %Val) 456