1; RUN: llc -march=mips -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s 2; RUN: llc -march=mipsel -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s 3 4define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 5 ; CHECK: add_v4f32: 6 7 %1 = load <4 x float>, <4 x float>* %a 8 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 9 %2 = load <4 x float>, <4 x float>* %b 10 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 11 %3 = fadd <4 x float> %1, %2 12 ; CHECK-DAG: fadd.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 13 store <4 x float> %3, <4 x float>* %c 14 ; CHECK-DAG: st.w [[R3]], 0($4) 15 16 ret void 17 ; CHECK: .size add_v4f32 18} 19 20define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 21 ; CHECK: add_v2f64: 22 23 %1 = load <2 x double>, <2 x double>* %a 24 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 25 %2 = load <2 x double>, <2 x double>* %b 26 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 27 %3 = fadd <2 x double> %1, %2 28 ; CHECK-DAG: fadd.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 29 store <2 x double> %3, <2 x double>* %c 30 ; CHECK-DAG: st.d [[R3]], 0($4) 31 32 ret void 33 ; CHECK: .size add_v2f64 34} 35 36define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 37 ; CHECK: sub_v4f32: 38 39 %1 = load <4 x float>, <4 x float>* %a 40 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 41 %2 = load <4 x float>, <4 x float>* %b 42 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 43 %3 = fsub <4 x float> %1, %2 44 ; CHECK-DAG: fsub.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 45 store <4 x float> %3, <4 x float>* %c 46 ; CHECK-DAG: st.w [[R3]], 0($4) 47 48 ret void 49 ; CHECK: .size sub_v4f32 50} 51 52define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 53 ; CHECK: sub_v2f64: 54 55 %1 = load <2 x double>, <2 x double>* %a 56 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 57 %2 = load <2 x double>, <2 x double>* %b 58 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 59 %3 = fsub <2 x double> %1, %2 60 ; CHECK-DAG: fsub.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 61 store <2 x double> %3, <2 x double>* %c 62 ; CHECK-DAG: st.d [[R3]], 0($4) 63 64 ret void 65 ; CHECK: .size sub_v2f64 66} 67 68define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 69 ; CHECK: mul_v4f32: 70 71 %1 = load <4 x float>, <4 x float>* %a 72 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 73 %2 = load <4 x float>, <4 x float>* %b 74 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 75 %3 = fmul <4 x float> %1, %2 76 ; CHECK-DAG: fmul.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 77 store <4 x float> %3, <4 x float>* %c 78 ; CHECK-DAG: st.w [[R3]], 0($4) 79 80 ret void 81 ; CHECK: .size mul_v4f32 82} 83 84define void @mul_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 85 ; CHECK: mul_v2f64: 86 87 %1 = load <2 x double>, <2 x double>* %a 88 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 89 %2 = load <2 x double>, <2 x double>* %b 90 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 91 %3 = fmul <2 x double> %1, %2 92 ; CHECK-DAG: fmul.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 93 store <2 x double> %3, <2 x double>* %c 94 ; CHECK-DAG: st.d [[R3]], 0($4) 95 96 ret void 97 ; CHECK: .size mul_v2f64 98} 99 100define void @fma_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b, 101 <4 x float>* %c) nounwind { 102 ; CHECK: fma_v4f32: 103 104 %1 = load <4 x float>, <4 x float>* %a 105 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 106 %2 = load <4 x float>, <4 x float>* %b 107 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 108 %3 = load <4 x float>, <4 x float>* %c 109 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) 110 %4 = tail call <4 x float> @llvm.fma.v4f32 (<4 x float> %1, <4 x float> %2, 111 <4 x float> %3) 112 ; CHECK-DAG: fmadd.w [[R1]], [[R2]], [[R3]] 113 store <4 x float> %4, <4 x float>* %d 114 ; CHECK-DAG: st.w [[R1]], 0($4) 115 116 ret void 117 ; CHECK: .size fma_v4f32 118} 119 120define void @fma_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, 121 <2 x double>* %c) nounwind { 122 ; CHECK: fma_v2f64: 123 124 %1 = load <2 x double>, <2 x double>* %a 125 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 126 %2 = load <2 x double>, <2 x double>* %b 127 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 128 %3 = load <2 x double>, <2 x double>* %c 129 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) 130 %4 = tail call <2 x double> @llvm.fma.v2f64 (<2 x double> %1, <2 x double> %2, 131 <2 x double> %3) 132 ; CHECK-DAG: fmadd.d [[R1]], [[R2]], [[R3]] 133 store <2 x double> %4, <2 x double>* %d 134 ; CHECK-DAG: st.d [[R1]], 0($4) 135 136 ret void 137 ; CHECK: .size fma_v2f64 138} 139 140define void @fmlu_fsub_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b, 141 <4 x float>* %c) nounwind { 142 ; CHECK: fmlu_fsub_v4f32: 143 144 %1 = load <4 x float>, <4 x float>* %b 145 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($6) 146 %2 = load <4 x float>, <4 x float>* %c 147 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($7) 148 %3 = fmul <4 x float> %1, %2 149 ; CHECK-DAG: fmul.w [[R2]], [[R1]], [[R2]] 150 %4 = load <4 x float>, <4 x float>* %a 151 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($5) 152 %5 = fsub <4 x float> %4, %3 153 ; CHECK-DAG: fsub.w [[R2]], [[R3]], [[R2]] 154 store <4 x float> %5, <4 x float>* %d 155 ; CHECK-DAG: st.w [[R2]], 0($4) 156 157 ret void 158 ; CHECK: .size fmlu_fsub_v4f32 159} 160 161define void @fmul_fsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, 162 <2 x double>* %c) nounwind { 163 ; CHECK: fmul_fsub_v2f64: 164 165 %1 = load <2 x double>, <2 x double>* %b 166 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($7) 167 %2 = load <2 x double>, <2 x double>* %c 168 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 169 %3 = fmul <2 x double> %1, %2 170 ; CHECK-DAG: fmul.d [[R1]], [[R2]], [[R1]] 171 %4 = load <2 x double>, <2 x double>* %a 172 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($5) 173 %5 = fsub <2 x double> %4, %3 174 ; CHECK-DAG: fsub.d [[R1]], [[R3]], [[R1]] 175 store <2 x double> %5, <2 x double>* %d 176 ; CHECK-DAG: st.d [[R1]], 0($4) 177 178 ret void 179 ; CHECK: .size fmul_fsub_v2f64 180} 181 182define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 183 ; CHECK: fdiv_v4f32: 184 185 %1 = load <4 x float>, <4 x float>* %a 186 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 187 %2 = load <4 x float>, <4 x float>* %b 188 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 189 %3 = fdiv <4 x float> %1, %2 190 ; CHECK-DAG: fdiv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 191 store <4 x float> %3, <4 x float>* %c 192 ; CHECK-DAG: st.w [[R3]], 0($4) 193 194 ret void 195 ; CHECK: .size fdiv_v4f32 196} 197 198define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 199 ; CHECK: fdiv_v2f64: 200 201 %1 = load <2 x double>, <2 x double>* %a 202 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 203 %2 = load <2 x double>, <2 x double>* %b 204 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 205 %3 = fdiv <2 x double> %1, %2 206 ; CHECK-DAG: fdiv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 207 store <2 x double> %3, <2 x double>* %c 208 ; CHECK-DAG: st.d [[R3]], 0($4) 209 210 ret void 211 ; CHECK: .size fdiv_v2f64 212} 213 214define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 215 ; CHECK: fabs_v4f32: 216 217 %1 = load <4 x float>, <4 x float>* %a 218 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 219 %2 = tail call <4 x float> @llvm.fabs.v4f32 (<4 x float> %1) 220 ; CHECK-DAG: fmax_a.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] 221 store <4 x float> %2, <4 x float>* %c 222 ; CHECK-DAG: st.w [[R3]], 0($4) 223 224 ret void 225 ; CHECK: .size fabs_v4f32 226} 227 228define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 229 ; CHECK: fabs_v2f64: 230 231 %1 = load <2 x double>, <2 x double>* %a 232 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 233 %2 = tail call <2 x double> @llvm.fabs.v2f64 (<2 x double> %1) 234 ; CHECK-DAG: fmax_a.d [[R3:\$w[0-9]+]], [[R1]], [[R1]] 235 store <2 x double> %2, <2 x double>* %c 236 ; CHECK-DAG: st.d [[R3]], 0($4) 237 238 ret void 239 ; CHECK: .size fabs_v2f64 240} 241 242define void @fexp2_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 243 ; CHECK: fexp2_v4f32: 244 245 %1 = load <4 x float>, <4 x float>* %a 246 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 247 %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1) 248 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 249 ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]] 250 ; CHECK-DAG: fexp2.w [[R4:\$w[0-9]+]], [[R3]], [[R1]] 251 store <4 x float> %2, <4 x float>* %c 252 ; CHECK-DAG: st.w [[R4]], 0($4) 253 254 ret void 255 ; CHECK: .size fexp2_v4f32 256} 257 258define void @fexp2_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 259 ; CHECK: fexp2_v2f64: 260 261 %1 = load <2 x double>, <2 x double>* %a 262 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 263 %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1) 264 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1 265 ; CHECK-DAG: ffint_u.d [[R4:\$w[0-9]+]], [[R3]] 266 ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]] 267 store <2 x double> %2, <2 x double>* %c 268 ; CHECK-DAG: st.d [[R4]], 0($4) 269 270 ret void 271 ; CHECK: .size fexp2_v2f64 272} 273 274define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind { 275 ; CHECK: fexp2_v4f32_2: 276 277 %1 = load <4 x float>, <4 x float>* %a 278 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 279 %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1) 280 %3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2 281 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 282 ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]] 283 ; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]] 284 store <4 x float> %3, <4 x float>* %c 285 ; CHECK-DAG: st.w [[R5]], 0($4) 286 287 ret void 288 ; CHECK: .size fexp2_v4f32_2 289} 290 291define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind { 292 ; CHECK: fexp2_v2f64_2: 293 294 %1 = load <2 x double>, <2 x double>* %a 295 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 296 %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1) 297 %3 = fmul <2 x double> <double 2.0, double 2.0>, %2 298 ; CHECK-DAG: ldi.d [[R2:\$w[0-9]+]], 1 299 ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R2]] 300 ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]] 301 store <2 x double> %3, <2 x double>* %c 302 ; CHECK-DAG: st.d [[R4]], 0($4) 303 304 ret void 305 ; CHECK: .size fexp2_v2f64_2 306} 307 308define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 309 ; CHECK: fsqrt_v4f32: 310 311 %1 = load <4 x float>, <4 x float>* %a 312 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 313 %2 = tail call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %1) 314 ; CHECK-DAG: fsqrt.w [[R3:\$w[0-9]+]], [[R1]] 315 store <4 x float> %2, <4 x float>* %c 316 ; CHECK-DAG: st.w [[R3]], 0($4) 317 318 ret void 319 ; CHECK: .size fsqrt_v4f32 320} 321 322define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 323 ; CHECK: fsqrt_v2f64: 324 325 %1 = load <2 x double>, <2 x double>* %a 326 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 327 %2 = tail call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %1) 328 ; CHECK-DAG: fsqrt.d [[R3:\$w[0-9]+]], [[R1]] 329 store <2 x double> %2, <2 x double>* %c 330 ; CHECK-DAG: st.d [[R3]], 0($4) 331 332 ret void 333 ; CHECK: .size fsqrt_v2f64 334} 335 336define void @ffint_u_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind { 337 ; CHECK: ffint_u_v4f32: 338 339 %1 = load <4 x i32>, <4 x i32>* %a 340 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 341 %2 = uitofp <4 x i32> %1 to <4 x float> 342 ; CHECK-DAG: ffint_u.w [[R3:\$w[0-9]+]], [[R1]] 343 store <4 x float> %2, <4 x float>* %c 344 ; CHECK-DAG: st.w [[R3]], 0($4) 345 346 ret void 347 ; CHECK: .size ffint_u_v4f32 348} 349 350define void @ffint_u_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind { 351 ; CHECK: ffint_u_v2f64: 352 353 %1 = load <2 x i64>, <2 x i64>* %a 354 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 355 %2 = uitofp <2 x i64> %1 to <2 x double> 356 ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R1]] 357 store <2 x double> %2, <2 x double>* %c 358 ; CHECK-DAG: st.d [[R3]], 0($4) 359 360 ret void 361 ; CHECK: .size ffint_u_v2f64 362} 363 364define void @ffint_s_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind { 365 ; CHECK: ffint_s_v4f32: 366 367 %1 = load <4 x i32>, <4 x i32>* %a 368 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 369 %2 = sitofp <4 x i32> %1 to <4 x float> 370 ; CHECK-DAG: ffint_s.w [[R3:\$w[0-9]+]], [[R1]] 371 store <4 x float> %2, <4 x float>* %c 372 ; CHECK-DAG: st.w [[R3]], 0($4) 373 374 ret void 375 ; CHECK: .size ffint_s_v4f32 376} 377 378define void @ffint_s_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind { 379 ; CHECK: ffint_s_v2f64: 380 381 %1 = load <2 x i64>, <2 x i64>* %a 382 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 383 %2 = sitofp <2 x i64> %1 to <2 x double> 384 ; CHECK-DAG: ffint_s.d [[R3:\$w[0-9]+]], [[R1]] 385 store <2 x double> %2, <2 x double>* %c 386 ; CHECK-DAG: st.d [[R3]], 0($4) 387 388 ret void 389 ; CHECK: .size ffint_s_v2f64 390} 391 392define void @ftrunc_u_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind { 393 ; CHECK: ftrunc_u_v4f32: 394 395 %1 = load <4 x float>, <4 x float>* %a 396 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 397 %2 = fptoui <4 x float> %1 to <4 x i32> 398 ; CHECK-DAG: ftrunc_u.w [[R3:\$w[0-9]+]], [[R1]] 399 store <4 x i32> %2, <4 x i32>* %c 400 ; CHECK-DAG: st.w [[R3]], 0($4) 401 402 ret void 403 ; CHECK: .size ftrunc_u_v4f32 404} 405 406define void @ftrunc_u_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind { 407 ; CHECK: ftrunc_u_v2f64: 408 409 %1 = load <2 x double>, <2 x double>* %a 410 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 411 %2 = fptoui <2 x double> %1 to <2 x i64> 412 ; CHECK-DAG: ftrunc_u.d [[R3:\$w[0-9]+]], [[R1]] 413 store <2 x i64> %2, <2 x i64>* %c 414 ; CHECK-DAG: st.d [[R3]], 0($4) 415 416 ret void 417 ; CHECK: .size ftrunc_u_v2f64 418} 419 420define void @ftrunc_s_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind { 421 ; CHECK: ftrunc_s_v4f32: 422 423 %1 = load <4 x float>, <4 x float>* %a 424 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 425 %2 = fptosi <4 x float> %1 to <4 x i32> 426 ; CHECK-DAG: ftrunc_s.w [[R3:\$w[0-9]+]], [[R1]] 427 store <4 x i32> %2, <4 x i32>* %c 428 ; CHECK-DAG: st.w [[R3]], 0($4) 429 430 ret void 431 ; CHECK: .size ftrunc_s_v4f32 432} 433 434define void @ftrunc_s_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind { 435 ; CHECK: ftrunc_s_v2f64: 436 437 %1 = load <2 x double>, <2 x double>* %a 438 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 439 %2 = fptosi <2 x double> %1 to <2 x i64> 440 ; CHECK-DAG: ftrunc_s.d [[R3:\$w[0-9]+]], [[R1]] 441 store <2 x i64> %2, <2 x i64>* %c 442 ; CHECK-DAG: st.d [[R3]], 0($4) 443 444 ret void 445 ; CHECK: .size ftrunc_s_v2f64 446} 447 448declare <4 x float> @llvm.fabs.v4f32(<4 x float> %Val) 449declare <2 x double> @llvm.fabs.v2f64(<2 x double> %Val) 450declare <4 x float> @llvm.exp2.v4f32(<4 x float> %val) 451declare <2 x double> @llvm.exp2.v2f64(<2 x double> %val) 452declare <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, 453 <4 x float> %c) 454declare <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, 455 <2 x double> %c) 456declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %Val) 457declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %Val) 458