1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s 2; rdar://13082402 3 4define float @t1(i32* nocapture %src) nounwind ssp { 5entry: 6; CHECK-LABEL: t1: 7; CHECK: ldr s0, [x0] 8; CHECK: scvtf s0, s0 9 %tmp1 = load i32* %src, align 4 10 %tmp2 = sitofp i32 %tmp1 to float 11 ret float %tmp2 12} 13 14define float @t2(i32* nocapture %src) nounwind ssp { 15entry: 16; CHECK-LABEL: t2: 17; CHECK: ldr s0, [x0] 18; CHECK: ucvtf s0, s0 19 %tmp1 = load i32* %src, align 4 20 %tmp2 = uitofp i32 %tmp1 to float 21 ret float %tmp2 22} 23 24define double @t3(i64* nocapture %src) nounwind ssp { 25entry: 26; CHECK-LABEL: t3: 27; CHECK: ldr d0, [x0] 28; CHECK: scvtf d0, d0 29 %tmp1 = load i64* %src, align 4 30 %tmp2 = sitofp i64 %tmp1 to double 31 ret double %tmp2 32} 33 34define double @t4(i64* nocapture %src) nounwind ssp { 35entry: 36; CHECK-LABEL: t4: 37; CHECK: ldr d0, [x0] 38; CHECK: ucvtf d0, d0 39 %tmp1 = load i64* %src, align 4 40 %tmp2 = uitofp i64 %tmp1 to double 41 ret double %tmp2 42} 43 44; rdar://13136456 45define double @t5(i32* nocapture %src) nounwind ssp optsize { 46entry: 47; CHECK-LABEL: t5: 48; CHECK: ldr [[REG:w[0-9]+]], [x0] 49; CHECK: scvtf d0, [[REG]] 50 %tmp1 = load i32* %src, align 4 51 %tmp2 = sitofp i32 %tmp1 to double 52 ret double %tmp2 53} 54 55; Check that we load in FP register when we want to convert into 56; floating point value. 57; This is much faster than loading on GPR and making the conversion 58; GPR -> FPR. 59; <rdar://problem/14599607> 60; 61; Check the flollowing patterns for signed/unsigned: 62; 1. load with scaled imm to float. 63; 2. load with scaled register to float. 64; 3. load with scaled imm to double. 65; 4. load with scaled register to double. 66; 5. load with unscaled imm to float. 67; 6. load with unscaled imm to double. 68; With loading size: 8, 16, 32, and 64-bits. 69 70; ********* 1. load with scaled imm to float. ********* 71define float @fct1(i8* nocapture %sp0) { 72; CHECK-LABEL: fct1: 73; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 74; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 75; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 76entry: 77 %addr = getelementptr i8* %sp0, i64 1 78 %pix_sp0.0.copyload = load i8* %addr, align 1 79 %val = uitofp i8 %pix_sp0.0.copyload to float 80 %vmull.i = fmul float %val, %val 81 ret float %vmull.i 82} 83 84define float @fct2(i16* nocapture %sp0) { 85; CHECK-LABEL: fct2: 86; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 87; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 88; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 89entry: 90 %addr = getelementptr i16* %sp0, i64 1 91 %pix_sp0.0.copyload = load i16* %addr, align 1 92 %val = uitofp i16 %pix_sp0.0.copyload to float 93 %vmull.i = fmul float %val, %val 94 ret float %vmull.i 95} 96 97define float @fct3(i32* nocapture %sp0) { 98; CHECK-LABEL: fct3: 99; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 100; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 101; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 102entry: 103 %addr = getelementptr i32* %sp0, i64 1 104 %pix_sp0.0.copyload = load i32* %addr, align 1 105 %val = uitofp i32 %pix_sp0.0.copyload to float 106 %vmull.i = fmul float %val, %val 107 ret float %vmull.i 108} 109 110; i64 -> f32 is not supported on floating point unit. 111define float @fct4(i64* nocapture %sp0) { 112; CHECK-LABEL: fct4: 113; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8] 114; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]] 115; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 116entry: 117 %addr = getelementptr i64* %sp0, i64 1 118 %pix_sp0.0.copyload = load i64* %addr, align 1 119 %val = uitofp i64 %pix_sp0.0.copyload to float 120 %vmull.i = fmul float %val, %val 121 ret float %vmull.i 122} 123 124; ********* 2. load with scaled register to float. ********* 125define float @fct5(i8* nocapture %sp0, i64 %offset) { 126; CHECK-LABEL: fct5: 127; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 128; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 129; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 130entry: 131 %addr = getelementptr i8* %sp0, i64 %offset 132 %pix_sp0.0.copyload = load i8* %addr, align 1 133 %val = uitofp i8 %pix_sp0.0.copyload to float 134 %vmull.i = fmul float %val, %val 135 ret float %vmull.i 136} 137 138define float @fct6(i16* nocapture %sp0, i64 %offset) { 139; CHECK-LABEL: fct6: 140; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 141; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 142; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 143entry: 144 %addr = getelementptr i16* %sp0, i64 %offset 145 %pix_sp0.0.copyload = load i16* %addr, align 1 146 %val = uitofp i16 %pix_sp0.0.copyload to float 147 %vmull.i = fmul float %val, %val 148 ret float %vmull.i 149} 150 151define float @fct7(i32* nocapture %sp0, i64 %offset) { 152; CHECK-LABEL: fct7: 153; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 154; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 155; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 156entry: 157 %addr = getelementptr i32* %sp0, i64 %offset 158 %pix_sp0.0.copyload = load i32* %addr, align 1 159 %val = uitofp i32 %pix_sp0.0.copyload to float 160 %vmull.i = fmul float %val, %val 161 ret float %vmull.i 162} 163 164; i64 -> f32 is not supported on floating point unit. 165define float @fct8(i64* nocapture %sp0, i64 %offset) { 166; CHECK-LABEL: fct8: 167; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 168; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]] 169; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 170entry: 171 %addr = getelementptr i64* %sp0, i64 %offset 172 %pix_sp0.0.copyload = load i64* %addr, align 1 173 %val = uitofp i64 %pix_sp0.0.copyload to float 174 %vmull.i = fmul float %val, %val 175 ret float %vmull.i 176} 177 178 179; ********* 3. load with scaled imm to double. ********* 180define double @fct9(i8* nocapture %sp0) { 181; CHECK-LABEL: fct9: 182; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 183; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 184; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 185entry: 186 %addr = getelementptr i8* %sp0, i64 1 187 %pix_sp0.0.copyload = load i8* %addr, align 1 188 %val = uitofp i8 %pix_sp0.0.copyload to double 189 %vmull.i = fmul double %val, %val 190 ret double %vmull.i 191} 192 193define double @fct10(i16* nocapture %sp0) { 194; CHECK-LABEL: fct10: 195; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 196; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 197; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 198entry: 199 %addr = getelementptr i16* %sp0, i64 1 200 %pix_sp0.0.copyload = load i16* %addr, align 1 201 %val = uitofp i16 %pix_sp0.0.copyload to double 202 %vmull.i = fmul double %val, %val 203 ret double %vmull.i 204} 205 206define double @fct11(i32* nocapture %sp0) { 207; CHECK-LABEL: fct11: 208; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 209; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 210; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 211entry: 212 %addr = getelementptr i32* %sp0, i64 1 213 %pix_sp0.0.copyload = load i32* %addr, align 1 214 %val = uitofp i32 %pix_sp0.0.copyload to double 215 %vmull.i = fmul double %val, %val 216 ret double %vmull.i 217} 218 219define double @fct12(i64* nocapture %sp0) { 220; CHECK-LABEL: fct12: 221; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8] 222; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 223; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 224entry: 225 %addr = getelementptr i64* %sp0, i64 1 226 %pix_sp0.0.copyload = load i64* %addr, align 1 227 %val = uitofp i64 %pix_sp0.0.copyload to double 228 %vmull.i = fmul double %val, %val 229 ret double %vmull.i 230} 231 232; ********* 4. load with scaled register to double. ********* 233define double @fct13(i8* nocapture %sp0, i64 %offset) { 234; CHECK-LABEL: fct13: 235; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 236; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 237; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 238entry: 239 %addr = getelementptr i8* %sp0, i64 %offset 240 %pix_sp0.0.copyload = load i8* %addr, align 1 241 %val = uitofp i8 %pix_sp0.0.copyload to double 242 %vmull.i = fmul double %val, %val 243 ret double %vmull.i 244} 245 246define double @fct14(i16* nocapture %sp0, i64 %offset) { 247; CHECK-LABEL: fct14: 248; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 249; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 250; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 251entry: 252 %addr = getelementptr i16* %sp0, i64 %offset 253 %pix_sp0.0.copyload = load i16* %addr, align 1 254 %val = uitofp i16 %pix_sp0.0.copyload to double 255 %vmull.i = fmul double %val, %val 256 ret double %vmull.i 257} 258 259define double @fct15(i32* nocapture %sp0, i64 %offset) { 260; CHECK-LABEL: fct15: 261; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 262; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 263; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 264entry: 265 %addr = getelementptr i32* %sp0, i64 %offset 266 %pix_sp0.0.copyload = load i32* %addr, align 1 267 %val = uitofp i32 %pix_sp0.0.copyload to double 268 %vmull.i = fmul double %val, %val 269 ret double %vmull.i 270} 271 272define double @fct16(i64* nocapture %sp0, i64 %offset) { 273; CHECK-LABEL: fct16: 274; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 275; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 276; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 277entry: 278 %addr = getelementptr i64* %sp0, i64 %offset 279 %pix_sp0.0.copyload = load i64* %addr, align 1 280 %val = uitofp i64 %pix_sp0.0.copyload to double 281 %vmull.i = fmul double %val, %val 282 ret double %vmull.i 283} 284 285; ********* 5. load with unscaled imm to float. ********* 286define float @fct17(i8* nocapture %sp0) { 287entry: 288; CHECK-LABEL: fct17: 289; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1] 290; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 291; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 292 %bitcast = ptrtoint i8* %sp0 to i64 293 %add = add i64 %bitcast, -1 294 %addr = inttoptr i64 %add to i8* 295 %pix_sp0.0.copyload = load i8* %addr, align 1 296 %val = uitofp i8 %pix_sp0.0.copyload to float 297 %vmull.i = fmul float %val, %val 298 ret float %vmull.i 299} 300 301define float @fct18(i16* nocapture %sp0) { 302; CHECK-LABEL: fct18: 303; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 304; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 305; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 306 %bitcast = ptrtoint i16* %sp0 to i64 307 %add = add i64 %bitcast, 1 308 %addr = inttoptr i64 %add to i16* 309 %pix_sp0.0.copyload = load i16* %addr, align 1 310 %val = uitofp i16 %pix_sp0.0.copyload to float 311 %vmull.i = fmul float %val, %val 312 ret float %vmull.i 313} 314 315define float @fct19(i32* nocapture %sp0) { 316; CHECK-LABEL: fct19: 317; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 318; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 319; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 320 %bitcast = ptrtoint i32* %sp0 to i64 321 %add = add i64 %bitcast, 1 322 %addr = inttoptr i64 %add to i32* 323 %pix_sp0.0.copyload = load i32* %addr, align 1 324 %val = uitofp i32 %pix_sp0.0.copyload to float 325 %vmull.i = fmul float %val, %val 326 ret float %vmull.i 327} 328 329; i64 -> f32 is not supported on floating point unit. 330define float @fct20(i64* nocapture %sp0) { 331; CHECK-LABEL: fct20: 332; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1] 333; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]] 334; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 335 %bitcast = ptrtoint i64* %sp0 to i64 336 %add = add i64 %bitcast, 1 337 %addr = inttoptr i64 %add to i64* 338 %pix_sp0.0.copyload = load i64* %addr, align 1 339 %val = uitofp i64 %pix_sp0.0.copyload to float 340 %vmull.i = fmul float %val, %val 341 ret float %vmull.i 342 343} 344 345; ********* 6. load with unscaled imm to double. ********* 346define double @fct21(i8* nocapture %sp0) { 347entry: 348; CHECK-LABEL: fct21: 349; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1] 350; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 351; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 352 %bitcast = ptrtoint i8* %sp0 to i64 353 %add = add i64 %bitcast, -1 354 %addr = inttoptr i64 %add to i8* 355 %pix_sp0.0.copyload = load i8* %addr, align 1 356 %val = uitofp i8 %pix_sp0.0.copyload to double 357 %vmull.i = fmul double %val, %val 358 ret double %vmull.i 359} 360 361define double @fct22(i16* nocapture %sp0) { 362; CHECK-LABEL: fct22: 363; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 364; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 365; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 366 %bitcast = ptrtoint i16* %sp0 to i64 367 %add = add i64 %bitcast, 1 368 %addr = inttoptr i64 %add to i16* 369 %pix_sp0.0.copyload = load i16* %addr, align 1 370 %val = uitofp i16 %pix_sp0.0.copyload to double 371 %vmull.i = fmul double %val, %val 372 ret double %vmull.i 373} 374 375define double @fct23(i32* nocapture %sp0) { 376; CHECK-LABEL: fct23: 377; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 378; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 379; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 380 %bitcast = ptrtoint i32* %sp0 to i64 381 %add = add i64 %bitcast, 1 382 %addr = inttoptr i64 %add to i32* 383 %pix_sp0.0.copyload = load i32* %addr, align 1 384 %val = uitofp i32 %pix_sp0.0.copyload to double 385 %vmull.i = fmul double %val, %val 386 ret double %vmull.i 387} 388 389define double @fct24(i64* nocapture %sp0) { 390; CHECK-LABEL: fct24: 391; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1] 392; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 393; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 394 %bitcast = ptrtoint i64* %sp0 to i64 395 %add = add i64 %bitcast, 1 396 %addr = inttoptr i64 %add to i64* 397 %pix_sp0.0.copyload = load i64* %addr, align 1 398 %val = uitofp i64 %pix_sp0.0.copyload to double 399 %vmull.i = fmul double %val, %val 400 ret double %vmull.i 401 402} 403 404; ********* 1s. load with scaled imm to float. ********* 405define float @sfct1(i8* nocapture %sp0) { 406; CHECK-LABEL: sfct1: 407; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 408; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 409; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 410; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 411; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 412entry: 413 %addr = getelementptr i8* %sp0, i64 1 414 %pix_sp0.0.copyload = load i8* %addr, align 1 415 %val = sitofp i8 %pix_sp0.0.copyload to float 416 %vmull.i = fmul float %val, %val 417 ret float %vmull.i 418} 419 420define float @sfct2(i16* nocapture %sp0) { 421; CHECK-LABEL: sfct2: 422; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 423; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 424; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 425; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 426entry: 427 %addr = getelementptr i16* %sp0, i64 1 428 %pix_sp0.0.copyload = load i16* %addr, align 1 429 %val = sitofp i16 %pix_sp0.0.copyload to float 430 %vmull.i = fmul float %val, %val 431 ret float %vmull.i 432} 433 434define float @sfct3(i32* nocapture %sp0) { 435; CHECK-LABEL: sfct3: 436; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 437; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 438; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 439entry: 440 %addr = getelementptr i32* %sp0, i64 1 441 %pix_sp0.0.copyload = load i32* %addr, align 1 442 %val = sitofp i32 %pix_sp0.0.copyload to float 443 %vmull.i = fmul float %val, %val 444 ret float %vmull.i 445} 446 447; i64 -> f32 is not supported on floating point unit. 448define float @sfct4(i64* nocapture %sp0) { 449; CHECK-LABEL: sfct4: 450; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8] 451; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]] 452; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 453entry: 454 %addr = getelementptr i64* %sp0, i64 1 455 %pix_sp0.0.copyload = load i64* %addr, align 1 456 %val = sitofp i64 %pix_sp0.0.copyload to float 457 %vmull.i = fmul float %val, %val 458 ret float %vmull.i 459} 460 461; ********* 2s. load with scaled register to float. ********* 462define float @sfct5(i8* nocapture %sp0, i64 %offset) { 463; CHECK-LABEL: sfct5: 464; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 465; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 466; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 467; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 468; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 469entry: 470 %addr = getelementptr i8* %sp0, i64 %offset 471 %pix_sp0.0.copyload = load i8* %addr, align 1 472 %val = sitofp i8 %pix_sp0.0.copyload to float 473 %vmull.i = fmul float %val, %val 474 ret float %vmull.i 475} 476 477define float @sfct6(i16* nocapture %sp0, i64 %offset) { 478; CHECK-LABEL: sfct6: 479; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 480; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 481; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 482; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 483entry: 484 %addr = getelementptr i16* %sp0, i64 %offset 485 %pix_sp0.0.copyload = load i16* %addr, align 1 486 %val = sitofp i16 %pix_sp0.0.copyload to float 487 %vmull.i = fmul float %val, %val 488 ret float %vmull.i 489} 490 491define float @sfct7(i32* nocapture %sp0, i64 %offset) { 492; CHECK-LABEL: sfct7: 493; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 494; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 495; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 496entry: 497 %addr = getelementptr i32* %sp0, i64 %offset 498 %pix_sp0.0.copyload = load i32* %addr, align 1 499 %val = sitofp i32 %pix_sp0.0.copyload to float 500 %vmull.i = fmul float %val, %val 501 ret float %vmull.i 502} 503 504; i64 -> f32 is not supported on floating point unit. 505define float @sfct8(i64* nocapture %sp0, i64 %offset) { 506; CHECK-LABEL: sfct8: 507; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 508; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]] 509; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 510entry: 511 %addr = getelementptr i64* %sp0, i64 %offset 512 %pix_sp0.0.copyload = load i64* %addr, align 1 513 %val = sitofp i64 %pix_sp0.0.copyload to float 514 %vmull.i = fmul float %val, %val 515 ret float %vmull.i 516} 517 518; ********* 3s. load with scaled imm to double. ********* 519define double @sfct9(i8* nocapture %sp0) { 520; CHECK-LABEL: sfct9: 521; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, #1] 522; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 523; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 524entry: 525 %addr = getelementptr i8* %sp0, i64 1 526 %pix_sp0.0.copyload = load i8* %addr, align 1 527 %val = sitofp i8 %pix_sp0.0.copyload to double 528 %vmull.i = fmul double %val, %val 529 ret double %vmull.i 530} 531 532define double @sfct10(i16* nocapture %sp0) { 533; CHECK-LABEL: sfct10: 534; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 535; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 536; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 537; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 538; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 539entry: 540 %addr = getelementptr i16* %sp0, i64 1 541 %pix_sp0.0.copyload = load i16* %addr, align 1 542 %val = sitofp i16 %pix_sp0.0.copyload to double 543 %vmull.i = fmul double %val, %val 544 ret double %vmull.i 545} 546 547define double @sfct11(i32* nocapture %sp0) { 548; CHECK-LABEL: sfct11: 549; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 550; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 551; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 552; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 553entry: 554 %addr = getelementptr i32* %sp0, i64 1 555 %pix_sp0.0.copyload = load i32* %addr, align 1 556 %val = sitofp i32 %pix_sp0.0.copyload to double 557 %vmull.i = fmul double %val, %val 558 ret double %vmull.i 559} 560 561define double @sfct12(i64* nocapture %sp0) { 562; CHECK-LABEL: sfct12: 563; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8] 564; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 565; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 566entry: 567 %addr = getelementptr i64* %sp0, i64 1 568 %pix_sp0.0.copyload = load i64* %addr, align 1 569 %val = sitofp i64 %pix_sp0.0.copyload to double 570 %vmull.i = fmul double %val, %val 571 ret double %vmull.i 572} 573 574; ********* 4s. load with scaled register to double. ********* 575define double @sfct13(i8* nocapture %sp0, i64 %offset) { 576; CHECK-LABEL: sfct13: 577; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, x1] 578; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 579; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 580entry: 581 %addr = getelementptr i8* %sp0, i64 %offset 582 %pix_sp0.0.copyload = load i8* %addr, align 1 583 %val = sitofp i8 %pix_sp0.0.copyload to double 584 %vmull.i = fmul double %val, %val 585 ret double %vmull.i 586} 587 588define double @sfct14(i16* nocapture %sp0, i64 %offset) { 589; CHECK-LABEL: sfct14: 590; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 591; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 592; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 593; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 594; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 595entry: 596 %addr = getelementptr i16* %sp0, i64 %offset 597 %pix_sp0.0.copyload = load i16* %addr, align 1 598 %val = sitofp i16 %pix_sp0.0.copyload to double 599 %vmull.i = fmul double %val, %val 600 ret double %vmull.i 601} 602 603define double @sfct15(i32* nocapture %sp0, i64 %offset) { 604; CHECK-LABEL: sfct15: 605; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 606; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 607; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 608; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 609entry: 610 %addr = getelementptr i32* %sp0, i64 %offset 611 %pix_sp0.0.copyload = load i32* %addr, align 1 612 %val = sitofp i32 %pix_sp0.0.copyload to double 613 %vmull.i = fmul double %val, %val 614 ret double %vmull.i 615} 616 617define double @sfct16(i64* nocapture %sp0, i64 %offset) { 618; CHECK-LABEL: sfct16: 619; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 620; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 621; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 622entry: 623 %addr = getelementptr i64* %sp0, i64 %offset 624 %pix_sp0.0.copyload = load i64* %addr, align 1 625 %val = sitofp i64 %pix_sp0.0.copyload to double 626 %vmull.i = fmul double %val, %val 627 ret double %vmull.i 628} 629 630; ********* 5s. load with unscaled imm to float. ********* 631define float @sfct17(i8* nocapture %sp0) { 632entry: 633; CHECK-LABEL: sfct17: 634; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1] 635; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 636; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 637; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 638; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 639 %bitcast = ptrtoint i8* %sp0 to i64 640 %add = add i64 %bitcast, -1 641 %addr = inttoptr i64 %add to i8* 642 %pix_sp0.0.copyload = load i8* %addr, align 1 643 %val = sitofp i8 %pix_sp0.0.copyload to float 644 %vmull.i = fmul float %val, %val 645 ret float %vmull.i 646} 647 648define float @sfct18(i16* nocapture %sp0) { 649; CHECK-LABEL: sfct18: 650; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 651; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 652; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 653; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 654 %bitcast = ptrtoint i16* %sp0 to i64 655 %add = add i64 %bitcast, 1 656 %addr = inttoptr i64 %add to i16* 657 %pix_sp0.0.copyload = load i16* %addr, align 1 658 %val = sitofp i16 %pix_sp0.0.copyload to float 659 %vmull.i = fmul float %val, %val 660 ret float %vmull.i 661} 662 663define float @sfct19(i32* nocapture %sp0) { 664; CHECK-LABEL: sfct19: 665; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 666; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 667; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 668 %bitcast = ptrtoint i32* %sp0 to i64 669 %add = add i64 %bitcast, 1 670 %addr = inttoptr i64 %add to i32* 671 %pix_sp0.0.copyload = load i32* %addr, align 1 672 %val = sitofp i32 %pix_sp0.0.copyload to float 673 %vmull.i = fmul float %val, %val 674 ret float %vmull.i 675} 676 677; i64 -> f32 is not supported on floating point unit. 678define float @sfct20(i64* nocapture %sp0) { 679; CHECK-LABEL: sfct20: 680; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1] 681; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]] 682; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 683 %bitcast = ptrtoint i64* %sp0 to i64 684 %add = add i64 %bitcast, 1 685 %addr = inttoptr i64 %add to i64* 686 %pix_sp0.0.copyload = load i64* %addr, align 1 687 %val = sitofp i64 %pix_sp0.0.copyload to float 688 %vmull.i = fmul float %val, %val 689 ret float %vmull.i 690 691} 692 693; ********* 6s. load with unscaled imm to double. ********* 694define double @sfct21(i8* nocapture %sp0) { 695entry: 696; CHECK-LABEL: sfct21: 697; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1] 698; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 699; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 700 %bitcast = ptrtoint i8* %sp0 to i64 701 %add = add i64 %bitcast, -1 702 %addr = inttoptr i64 %add to i8* 703 %pix_sp0.0.copyload = load i8* %addr, align 1 704 %val = sitofp i8 %pix_sp0.0.copyload to double 705 %vmull.i = fmul double %val, %val 706 ret double %vmull.i 707} 708 709define double @sfct22(i16* nocapture %sp0) { 710; CHECK-LABEL: sfct22: 711; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 712; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 713; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 714; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 715; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 716 %bitcast = ptrtoint i16* %sp0 to i64 717 %add = add i64 %bitcast, 1 718 %addr = inttoptr i64 %add to i16* 719 %pix_sp0.0.copyload = load i16* %addr, align 1 720 %val = sitofp i16 %pix_sp0.0.copyload to double 721 %vmull.i = fmul double %val, %val 722 ret double %vmull.i 723} 724 725define double @sfct23(i32* nocapture %sp0) { 726; CHECK-LABEL: sfct23: 727; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 728; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 729; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 730; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 731 %bitcast = ptrtoint i32* %sp0 to i64 732 %add = add i64 %bitcast, 1 733 %addr = inttoptr i64 %add to i32* 734 %pix_sp0.0.copyload = load i32* %addr, align 1 735 %val = sitofp i32 %pix_sp0.0.copyload to double 736 %vmull.i = fmul double %val, %val 737 ret double %vmull.i 738} 739 740define double @sfct24(i64* nocapture %sp0) { 741; CHECK-LABEL: sfct24: 742; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1] 743; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 744; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 745 %bitcast = ptrtoint i64* %sp0 to i64 746 %add = add i64 %bitcast, 1 747 %addr = inttoptr i64 %add to i64* 748 %pix_sp0.0.copyload = load i64* %addr, align 1 749 %val = sitofp i64 %pix_sp0.0.copyload to double 750 %vmull.i = fmul double %val, %val 751 ret double %vmull.i 752 753} 754 755; Check that we do not use SSHLL code sequence when code size is a concern. 756define float @codesize_sfct17(i8* nocapture %sp0) optsize { 757entry: 758; CHECK-LABEL: codesize_sfct17: 759; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1] 760; CHECK-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]] 761; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 762 %bitcast = ptrtoint i8* %sp0 to i64 763 %add = add i64 %bitcast, -1 764 %addr = inttoptr i64 %add to i8* 765 %pix_sp0.0.copyload = load i8* %addr, align 1 766 %val = sitofp i8 %pix_sp0.0.copyload to float 767 %vmull.i = fmul float %val, %val 768 ret float %vmull.i 769} 770 771define double @codesize_sfct11(i32* nocapture %sp0) minsize { 772; CHECK-LABEL: sfct11: 773; CHECK: ldr w[[REGNUM:[0-9]+]], [x0, #4] 774; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 775; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 776entry: 777 %addr = getelementptr i32* %sp0, i64 1 778 %pix_sp0.0.copyload = load i32* %addr, align 1 779 %val = sitofp i32 %pix_sp0.0.copyload to double 780 %vmull.i = fmul double %val, %val 781 ret double %vmull.i 782} 783 784; Adding fp128 custom lowering makes these a little fragile since we have to 785; return the correct mix of Legal/Expand from the custom method. 786; 787; rdar://problem/14991489 788 789define float @float_from_i128(i128 %in) { 790; CHECK-LABEL: float_from_i128: 791; CHECK: bl {{_?__floatuntisf}} 792 %conv = uitofp i128 %in to float 793 ret float %conv 794} 795 796define double @double_from_i128(i128 %in) { 797; CHECK-LABEL: double_from_i128: 798; CHECK: bl {{_?__floattidf}} 799 %conv = sitofp i128 %in to double 800 ret double %conv 801} 802 803define fp128 @fp128_from_i128(i128 %in) { 804; CHECK-LABEL: fp128_from_i128: 805; CHECK: bl {{_?__floatuntitf}} 806 %conv = uitofp i128 %in to fp128 807 ret fp128 %conv 808} 809 810define i128 @i128_from_float(float %in) { 811; CHECK-LABEL: i128_from_float 812; CHECK: bl {{_?__fixsfti}} 813 %conv = fptosi float %in to i128 814 ret i128 %conv 815} 816 817define i128 @i128_from_double(double %in) { 818; CHECK-LABEL: i128_from_double 819; CHECK: bl {{_?__fixunsdfti}} 820 %conv = fptoui double %in to i128 821 ret i128 %conv 822} 823 824define i128 @i128_from_fp128(fp128 %in) { 825; CHECK-LABEL: i128_from_fp128 826; CHECK: bl {{_?__fixtfti}} 827 %conv = fptosi fp128 %in to i128 828 ret i128 %conv 829} 830 831