1; RUN: llc < %s -march=arm64 -mcpu=cyclone -aarch64-neon-syntax=apple | FileCheck %s 2; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 | FileCheck --check-prefix=CHECK-A57 %s 3; rdar://13082402 4 5define float @t1(i32* nocapture %src) nounwind ssp { 6entry: 7; CHECK-LABEL: t1: 8; CHECK: ldr s0, [x0] 9; CHECK: scvtf s0, s0 10 %tmp1 = load i32, i32* %src, align 4 11 %tmp2 = sitofp i32 %tmp1 to float 12 ret float %tmp2 13} 14 15define float @t2(i32* nocapture %src) nounwind ssp { 16entry: 17; CHECK-LABEL: t2: 18; CHECK: ldr s0, [x0] 19; CHECK: ucvtf s0, s0 20 %tmp1 = load i32, i32* %src, align 4 21 %tmp2 = uitofp i32 %tmp1 to float 22 ret float %tmp2 23} 24 25define double @t3(i64* nocapture %src) nounwind ssp { 26entry: 27; CHECK-LABEL: t3: 28; CHECK: ldr d0, [x0] 29; CHECK: scvtf d0, d0 30 %tmp1 = load i64, i64* %src, align 4 31 %tmp2 = sitofp i64 %tmp1 to double 32 ret double %tmp2 33} 34 35define double @t4(i64* nocapture %src) nounwind ssp { 36entry: 37; CHECK-LABEL: t4: 38; CHECK: ldr d0, [x0] 39; CHECK: ucvtf d0, d0 40 %tmp1 = load i64, i64* %src, align 4 41 %tmp2 = uitofp i64 %tmp1 to double 42 ret double %tmp2 43} 44 45; rdar://13136456 46define double @t5(i32* nocapture %src) nounwind ssp optsize { 47entry: 48; CHECK-LABEL: t5: 49; CHECK: ldr [[REG:w[0-9]+]], [x0] 50; CHECK: scvtf d0, [[REG]] 51 %tmp1 = load i32, i32* %src, align 4 52 %tmp2 = sitofp i32 %tmp1 to double 53 ret double %tmp2 54} 55 56; Check that we load in FP register when we want to convert into 57; floating point value. 58; This is much faster than loading on GPR and making the conversion 59; GPR -> FPR. 60; <rdar://problem/14599607> 61; 62; Check the flollowing patterns for signed/unsigned: 63; 1. load with scaled imm to float. 64; 2. load with scaled register to float. 65; 3. load with scaled imm to double. 66; 4. load with scaled register to double. 67; 5. load with unscaled imm to float. 68; 6. load with unscaled imm to double. 69; With loading size: 8, 16, 32, and 64-bits. 70 71; ********* 1. load with scaled imm to float. ********* 72define float @fct1(i8* nocapture %sp0) { 73; CHECK-LABEL: fct1: 74; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 75; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 76; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 77entry: 78 %addr = getelementptr i8, i8* %sp0, i64 1 79 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 80 %val = uitofp i8 %pix_sp0.0.copyload to float 81 %vmull.i = fmul float %val, %val 82 ret float %vmull.i 83} 84 85define float @fct2(i16* nocapture %sp0) { 86; CHECK-LABEL: fct2: 87; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 88; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 89; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 90entry: 91 %addr = getelementptr i16, i16* %sp0, i64 1 92 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 93 %val = uitofp i16 %pix_sp0.0.copyload to float 94 %vmull.i = fmul float %val, %val 95 ret float %vmull.i 96} 97 98define float @fct3(i32* nocapture %sp0) { 99; CHECK-LABEL: fct3: 100; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 101; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 102; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 103entry: 104 %addr = getelementptr i32, i32* %sp0, i64 1 105 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 106 %val = uitofp i32 %pix_sp0.0.copyload to float 107 %vmull.i = fmul float %val, %val 108 ret float %vmull.i 109} 110 111; i64 -> f32 is not supported on floating point unit. 112define float @fct4(i64* nocapture %sp0) { 113; CHECK-LABEL: fct4: 114; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8] 115; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]] 116; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 117entry: 118 %addr = getelementptr i64, i64* %sp0, i64 1 119 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 120 %val = uitofp i64 %pix_sp0.0.copyload to float 121 %vmull.i = fmul float %val, %val 122 ret float %vmull.i 123} 124 125; ********* 2. load with scaled register to float. ********* 126define float @fct5(i8* nocapture %sp0, i64 %offset) { 127; CHECK-LABEL: fct5: 128; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 129; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 130; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 131entry: 132 %addr = getelementptr i8, i8* %sp0, i64 %offset 133 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 134 %val = uitofp i8 %pix_sp0.0.copyload to float 135 %vmull.i = fmul float %val, %val 136 ret float %vmull.i 137} 138 139define float @fct6(i16* nocapture %sp0, i64 %offset) { 140; CHECK-LABEL: fct6: 141; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 142; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 143; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 144entry: 145 %addr = getelementptr i16, i16* %sp0, i64 %offset 146 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 147 %val = uitofp i16 %pix_sp0.0.copyload to float 148 %vmull.i = fmul float %val, %val 149 ret float %vmull.i 150} 151 152define float @fct7(i32* nocapture %sp0, i64 %offset) { 153; CHECK-LABEL: fct7: 154; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 155; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 156; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 157entry: 158 %addr = getelementptr i32, i32* %sp0, i64 %offset 159 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 160 %val = uitofp i32 %pix_sp0.0.copyload to float 161 %vmull.i = fmul float %val, %val 162 ret float %vmull.i 163} 164 165; i64 -> f32 is not supported on floating point unit. 166define float @fct8(i64* nocapture %sp0, i64 %offset) { 167; CHECK-LABEL: fct8: 168; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 169; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]] 170; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 171entry: 172 %addr = getelementptr i64, i64* %sp0, i64 %offset 173 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 174 %val = uitofp i64 %pix_sp0.0.copyload to float 175 %vmull.i = fmul float %val, %val 176 ret float %vmull.i 177} 178 179 180; ********* 3. load with scaled imm to double. ********* 181define double @fct9(i8* nocapture %sp0) { 182; CHECK-LABEL: fct9: 183; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 184; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 185; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 186entry: 187 %addr = getelementptr i8, i8* %sp0, i64 1 188 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 189 %val = uitofp i8 %pix_sp0.0.copyload to double 190 %vmull.i = fmul double %val, %val 191 ret double %vmull.i 192} 193 194define double @fct10(i16* nocapture %sp0) { 195; CHECK-LABEL: fct10: 196; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 197; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 198; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 199entry: 200 %addr = getelementptr i16, i16* %sp0, i64 1 201 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 202 %val = uitofp i16 %pix_sp0.0.copyload to double 203 %vmull.i = fmul double %val, %val 204 ret double %vmull.i 205} 206 207define double @fct11(i32* nocapture %sp0) { 208; CHECK-LABEL: fct11: 209; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 210; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 211; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 212entry: 213 %addr = getelementptr i32, i32* %sp0, i64 1 214 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 215 %val = uitofp i32 %pix_sp0.0.copyload to double 216 %vmull.i = fmul double %val, %val 217 ret double %vmull.i 218} 219 220define double @fct12(i64* nocapture %sp0) { 221; CHECK-LABEL: fct12: 222; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8] 223; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 224; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 225entry: 226 %addr = getelementptr i64, i64* %sp0, i64 1 227 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 228 %val = uitofp i64 %pix_sp0.0.copyload to double 229 %vmull.i = fmul double %val, %val 230 ret double %vmull.i 231} 232 233; ********* 4. load with scaled register to double. ********* 234define double @fct13(i8* nocapture %sp0, i64 %offset) { 235; CHECK-LABEL: fct13: 236; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 237; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 238; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 239entry: 240 %addr = getelementptr i8, i8* %sp0, i64 %offset 241 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 242 %val = uitofp i8 %pix_sp0.0.copyload to double 243 %vmull.i = fmul double %val, %val 244 ret double %vmull.i 245} 246 247define double @fct14(i16* nocapture %sp0, i64 %offset) { 248; CHECK-LABEL: fct14: 249; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 250; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 251; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 252entry: 253 %addr = getelementptr i16, i16* %sp0, i64 %offset 254 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 255 %val = uitofp i16 %pix_sp0.0.copyload to double 256 %vmull.i = fmul double %val, %val 257 ret double %vmull.i 258} 259 260define double @fct15(i32* nocapture %sp0, i64 %offset) { 261; CHECK-LABEL: fct15: 262; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 263; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 264; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 265entry: 266 %addr = getelementptr i32, i32* %sp0, i64 %offset 267 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 268 %val = uitofp i32 %pix_sp0.0.copyload to double 269 %vmull.i = fmul double %val, %val 270 ret double %vmull.i 271} 272 273define double @fct16(i64* nocapture %sp0, i64 %offset) { 274; CHECK-LABEL: fct16: 275; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 276; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 277; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 278entry: 279 %addr = getelementptr i64, i64* %sp0, i64 %offset 280 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 281 %val = uitofp i64 %pix_sp0.0.copyload to double 282 %vmull.i = fmul double %val, %val 283 ret double %vmull.i 284} 285 286; ********* 5. load with unscaled imm to float. ********* 287define float @fct17(i8* nocapture %sp0) { 288entry: 289; CHECK-LABEL: fct17: 290; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1] 291; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 292; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 293 %bitcast = ptrtoint i8* %sp0 to i64 294 %add = add i64 %bitcast, -1 295 %addr = inttoptr i64 %add to i8* 296 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 297 %val = uitofp i8 %pix_sp0.0.copyload to float 298 %vmull.i = fmul float %val, %val 299 ret float %vmull.i 300} 301 302define float @fct18(i16* nocapture %sp0) { 303; CHECK-LABEL: fct18: 304; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 305; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 306; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 307 %bitcast = ptrtoint i16* %sp0 to i64 308 %add = add i64 %bitcast, 1 309 %addr = inttoptr i64 %add to i16* 310 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 311 %val = uitofp i16 %pix_sp0.0.copyload to float 312 %vmull.i = fmul float %val, %val 313 ret float %vmull.i 314} 315 316define float @fct19(i32* nocapture %sp0) { 317; CHECK-LABEL: fct19: 318; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 319; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 320; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 321 %bitcast = ptrtoint i32* %sp0 to i64 322 %add = add i64 %bitcast, 1 323 %addr = inttoptr i64 %add to i32* 324 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 325 %val = uitofp i32 %pix_sp0.0.copyload to float 326 %vmull.i = fmul float %val, %val 327 ret float %vmull.i 328} 329 330; i64 -> f32 is not supported on floating point unit. 331define float @fct20(i64* nocapture %sp0) { 332; CHECK-LABEL: fct20: 333; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1] 334; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]] 335; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 336 %bitcast = ptrtoint i64* %sp0 to i64 337 %add = add i64 %bitcast, 1 338 %addr = inttoptr i64 %add to i64* 339 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 340 %val = uitofp i64 %pix_sp0.0.copyload to float 341 %vmull.i = fmul float %val, %val 342 ret float %vmull.i 343 344} 345 346; ********* 6. load with unscaled imm to double. ********* 347define double @fct21(i8* nocapture %sp0) { 348entry: 349; CHECK-LABEL: fct21: 350; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1] 351; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 352; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 353 %bitcast = ptrtoint i8* %sp0 to i64 354 %add = add i64 %bitcast, -1 355 %addr = inttoptr i64 %add to i8* 356 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 357 %val = uitofp i8 %pix_sp0.0.copyload to double 358 %vmull.i = fmul double %val, %val 359 ret double %vmull.i 360} 361 362define double @fct22(i16* nocapture %sp0) { 363; CHECK-LABEL: fct22: 364; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 365; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 366; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 367 %bitcast = ptrtoint i16* %sp0 to i64 368 %add = add i64 %bitcast, 1 369 %addr = inttoptr i64 %add to i16* 370 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 371 %val = uitofp i16 %pix_sp0.0.copyload to double 372 %vmull.i = fmul double %val, %val 373 ret double %vmull.i 374} 375 376define double @fct23(i32* nocapture %sp0) { 377; CHECK-LABEL: fct23: 378; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 379; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 380; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 381 %bitcast = ptrtoint i32* %sp0 to i64 382 %add = add i64 %bitcast, 1 383 %addr = inttoptr i64 %add to i32* 384 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 385 %val = uitofp i32 %pix_sp0.0.copyload to double 386 %vmull.i = fmul double %val, %val 387 ret double %vmull.i 388} 389 390define double @fct24(i64* nocapture %sp0) { 391; CHECK-LABEL: fct24: 392; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1] 393; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 394; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 395 %bitcast = ptrtoint i64* %sp0 to i64 396 %add = add i64 %bitcast, 1 397 %addr = inttoptr i64 %add to i64* 398 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 399 %val = uitofp i64 %pix_sp0.0.copyload to double 400 %vmull.i = fmul double %val, %val 401 ret double %vmull.i 402 403} 404 405; ********* 1s. load with scaled imm to float. ********* 406define float @sfct1(i8* nocapture %sp0) { 407; CHECK-LABEL: sfct1: 408; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 409; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 410; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 411; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 412; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 413; CHECK-A57-LABEL: sfct1: 414; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, #1] 415; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]] 416; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]] 417entry: 418 %addr = getelementptr i8, i8* %sp0, i64 1 419 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 420 %val = sitofp i8 %pix_sp0.0.copyload to float 421 %vmull.i = fmul float %val, %val 422 ret float %vmull.i 423} 424 425define float @sfct2(i16* nocapture %sp0) { 426; CHECK-LABEL: sfct2: 427; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 428; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 429; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 430; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 431entry: 432 %addr = getelementptr i16, i16* %sp0, i64 1 433 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 434 %val = sitofp i16 %pix_sp0.0.copyload to float 435 %vmull.i = fmul float %val, %val 436 ret float %vmull.i 437} 438 439define float @sfct3(i32* nocapture %sp0) { 440; CHECK-LABEL: sfct3: 441; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 442; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 443; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 444entry: 445 %addr = getelementptr i32, i32* %sp0, i64 1 446 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 447 %val = sitofp i32 %pix_sp0.0.copyload to float 448 %vmull.i = fmul float %val, %val 449 ret float %vmull.i 450} 451 452; i64 -> f32 is not supported on floating point unit. 453define float @sfct4(i64* nocapture %sp0) { 454; CHECK-LABEL: sfct4: 455; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8] 456; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]] 457; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 458entry: 459 %addr = getelementptr i64, i64* %sp0, i64 1 460 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 461 %val = sitofp i64 %pix_sp0.0.copyload to float 462 %vmull.i = fmul float %val, %val 463 ret float %vmull.i 464} 465 466; ********* 2s. load with scaled register to float. ********* 467define float @sfct5(i8* nocapture %sp0, i64 %offset) { 468; CHECK-LABEL: sfct5: 469; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 470; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 471; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 472; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 473; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 474; CHECK-A57-LABEL: sfct5: 475; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, x1] 476; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]] 477; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]] 478entry: 479 %addr = getelementptr i8, i8* %sp0, i64 %offset 480 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 481 %val = sitofp i8 %pix_sp0.0.copyload to float 482 %vmull.i = fmul float %val, %val 483 ret float %vmull.i 484} 485 486define float @sfct6(i16* nocapture %sp0, i64 %offset) { 487; CHECK-LABEL: sfct6: 488; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 489; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 490; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 491; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 492entry: 493 %addr = getelementptr i16, i16* %sp0, i64 %offset 494 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 495 %val = sitofp i16 %pix_sp0.0.copyload to float 496 %vmull.i = fmul float %val, %val 497 ret float %vmull.i 498} 499 500define float @sfct7(i32* nocapture %sp0, i64 %offset) { 501; CHECK-LABEL: sfct7: 502; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 503; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 504; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 505entry: 506 %addr = getelementptr i32, i32* %sp0, i64 %offset 507 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 508 %val = sitofp i32 %pix_sp0.0.copyload to float 509 %vmull.i = fmul float %val, %val 510 ret float %vmull.i 511} 512 513; i64 -> f32 is not supported on floating point unit. 514define float @sfct8(i64* nocapture %sp0, i64 %offset) { 515; CHECK-LABEL: sfct8: 516; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 517; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]] 518; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 519entry: 520 %addr = getelementptr i64, i64* %sp0, i64 %offset 521 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 522 %val = sitofp i64 %pix_sp0.0.copyload to float 523 %vmull.i = fmul float %val, %val 524 ret float %vmull.i 525} 526 527; ********* 3s. load with scaled imm to double. ********* 528define double @sfct9(i8* nocapture %sp0) { 529; CHECK-LABEL: sfct9: 530; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, #1] 531; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 532; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 533entry: 534 %addr = getelementptr i8, i8* %sp0, i64 1 535 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 536 %val = sitofp i8 %pix_sp0.0.copyload to double 537 %vmull.i = fmul double %val, %val 538 ret double %vmull.i 539} 540 541define double @sfct10(i16* nocapture %sp0) { 542; CHECK-LABEL: sfct10: 543; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 544; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 545; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 546; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 547; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 548; CHECK-A57-LABEL: sfct10: 549; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, #2] 550; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 551; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]] 552entry: 553 %addr = getelementptr i16, i16* %sp0, i64 1 554 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 555 %val = sitofp i16 %pix_sp0.0.copyload to double 556 %vmull.i = fmul double %val, %val 557 ret double %vmull.i 558} 559 560define double @sfct11(i32* nocapture %sp0) { 561; CHECK-LABEL: sfct11: 562; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 563; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 564; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 565; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 566entry: 567 %addr = getelementptr i32, i32* %sp0, i64 1 568 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 569 %val = sitofp i32 %pix_sp0.0.copyload to double 570 %vmull.i = fmul double %val, %val 571 ret double %vmull.i 572} 573 574define double @sfct12(i64* nocapture %sp0) { 575; CHECK-LABEL: sfct12: 576; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8] 577; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 578; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 579entry: 580 %addr = getelementptr i64, i64* %sp0, i64 1 581 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 582 %val = sitofp i64 %pix_sp0.0.copyload to double 583 %vmull.i = fmul double %val, %val 584 ret double %vmull.i 585} 586 587; ********* 4s. load with scaled register to double. ********* 588define double @sfct13(i8* nocapture %sp0, i64 %offset) { 589; CHECK-LABEL: sfct13: 590; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, x1] 591; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 592; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 593entry: 594 %addr = getelementptr i8, i8* %sp0, i64 %offset 595 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 596 %val = sitofp i8 %pix_sp0.0.copyload to double 597 %vmull.i = fmul double %val, %val 598 ret double %vmull.i 599} 600 601define double @sfct14(i16* nocapture %sp0, i64 %offset) { 602; CHECK-LABEL: sfct14: 603; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 604; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 605; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 606; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 607; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 608; CHECK-A57-LABEL: sfct14: 609; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 610; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 611; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]] 612entry: 613 %addr = getelementptr i16, i16* %sp0, i64 %offset 614 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 615 %val = sitofp i16 %pix_sp0.0.copyload to double 616 %vmull.i = fmul double %val, %val 617 ret double %vmull.i 618} 619 620define double @sfct15(i32* nocapture %sp0, i64 %offset) { 621; CHECK-LABEL: sfct15: 622; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 623; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 624; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 625; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 626entry: 627 %addr = getelementptr i32, i32* %sp0, i64 %offset 628 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 629 %val = sitofp i32 %pix_sp0.0.copyload to double 630 %vmull.i = fmul double %val, %val 631 ret double %vmull.i 632} 633 634define double @sfct16(i64* nocapture %sp0, i64 %offset) { 635; CHECK-LABEL: sfct16: 636; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 637; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 638; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 639entry: 640 %addr = getelementptr i64, i64* %sp0, i64 %offset 641 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 642 %val = sitofp i64 %pix_sp0.0.copyload to double 643 %vmull.i = fmul double %val, %val 644 ret double %vmull.i 645} 646 647; ********* 5s. load with unscaled imm to float. ********* 648define float @sfct17(i8* nocapture %sp0) { 649entry: 650; CHECK-LABEL: sfct17: 651; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1] 652; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 653; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 654; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 655; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 656; CHECK-A57-LABEL: sfct17: 657; CHECK-A57: ldursb w[[REGNUM:[0-9]+]], [x0, #-1] 658; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]] 659; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]] 660 %bitcast = ptrtoint i8* %sp0 to i64 661 %add = add i64 %bitcast, -1 662 %addr = inttoptr i64 %add to i8* 663 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 664 %val = sitofp i8 %pix_sp0.0.copyload to float 665 %vmull.i = fmul float %val, %val 666 ret float %vmull.i 667} 668 669define float @sfct18(i16* nocapture %sp0) { 670; CHECK-LABEL: sfct18: 671; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 672; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 673; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 674; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 675 %bitcast = ptrtoint i16* %sp0 to i64 676 %add = add i64 %bitcast, 1 677 %addr = inttoptr i64 %add to i16* 678 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 679 %val = sitofp i16 %pix_sp0.0.copyload to float 680 %vmull.i = fmul float %val, %val 681 ret float %vmull.i 682} 683 684define float @sfct19(i32* nocapture %sp0) { 685; CHECK-LABEL: sfct19: 686; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 687; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 688; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 689 %bitcast = ptrtoint i32* %sp0 to i64 690 %add = add i64 %bitcast, 1 691 %addr = inttoptr i64 %add to i32* 692 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 693 %val = sitofp i32 %pix_sp0.0.copyload to float 694 %vmull.i = fmul float %val, %val 695 ret float %vmull.i 696} 697 698; i64 -> f32 is not supported on floating point unit. 699define float @sfct20(i64* nocapture %sp0) { 700; CHECK-LABEL: sfct20: 701; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1] 702; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]] 703; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 704 %bitcast = ptrtoint i64* %sp0 to i64 705 %add = add i64 %bitcast, 1 706 %addr = inttoptr i64 %add to i64* 707 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 708 %val = sitofp i64 %pix_sp0.0.copyload to float 709 %vmull.i = fmul float %val, %val 710 ret float %vmull.i 711 712} 713 714; ********* 6s. load with unscaled imm to double. ********* 715define double @sfct21(i8* nocapture %sp0) { 716entry: 717; CHECK-LABEL: sfct21: 718; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1] 719; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 720; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 721 %bitcast = ptrtoint i8* %sp0 to i64 722 %add = add i64 %bitcast, -1 723 %addr = inttoptr i64 %add to i8* 724 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 725 %val = sitofp i8 %pix_sp0.0.copyload to double 726 %vmull.i = fmul double %val, %val 727 ret double %vmull.i 728} 729 730define double @sfct22(i16* nocapture %sp0) { 731; CHECK-LABEL: sfct22: 732; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 733; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 734; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 735; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 736; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 737; CHECK-A57-LABEL: sfct22: 738; CHECK-A57: ldursh w[[REGNUM:[0-9]+]], [x0, #1] 739; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 740; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]] 741 %bitcast = ptrtoint i16* %sp0 to i64 742 %add = add i64 %bitcast, 1 743 %addr = inttoptr i64 %add to i16* 744 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 745 %val = sitofp i16 %pix_sp0.0.copyload to double 746 %vmull.i = fmul double %val, %val 747 ret double %vmull.i 748} 749 750define double @sfct23(i32* nocapture %sp0) { 751; CHECK-LABEL: sfct23: 752; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 753; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 754; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 755; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 756 %bitcast = ptrtoint i32* %sp0 to i64 757 %add = add i64 %bitcast, 1 758 %addr = inttoptr i64 %add to i32* 759 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 760 %val = sitofp i32 %pix_sp0.0.copyload to double 761 %vmull.i = fmul double %val, %val 762 ret double %vmull.i 763} 764 765define double @sfct24(i64* nocapture %sp0) { 766; CHECK-LABEL: sfct24: 767; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1] 768; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 769; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 770 %bitcast = ptrtoint i64* %sp0 to i64 771 %add = add i64 %bitcast, 1 772 %addr = inttoptr i64 %add to i64* 773 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 774 %val = sitofp i64 %pix_sp0.0.copyload to double 775 %vmull.i = fmul double %val, %val 776 ret double %vmull.i 777 778} 779 780; Check that we do not use SSHLL code sequence when code size is a concern. 781define float @codesize_sfct17(i8* nocapture %sp0) optsize { 782entry: 783; CHECK-LABEL: codesize_sfct17: 784; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1] 785; CHECK-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]] 786; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 787 %bitcast = ptrtoint i8* %sp0 to i64 788 %add = add i64 %bitcast, -1 789 %addr = inttoptr i64 %add to i8* 790 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 791 %val = sitofp i8 %pix_sp0.0.copyload to float 792 %vmull.i = fmul float %val, %val 793 ret float %vmull.i 794} 795 796define double @codesize_sfct11(i32* nocapture %sp0) minsize { 797; CHECK-LABEL: sfct11: 798; CHECK: ldr w[[REGNUM:[0-9]+]], [x0, #4] 799; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 800; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 801entry: 802 %addr = getelementptr i32, i32* %sp0, i64 1 803 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 804 %val = sitofp i32 %pix_sp0.0.copyload to double 805 %vmull.i = fmul double %val, %val 806 ret double %vmull.i 807} 808 809; Adding fp128 custom lowering makes these a little fragile since we have to 810; return the correct mix of Legal/Expand from the custom method. 811; 812; rdar://problem/14991489 813 814define float @float_from_i128(i128 %in) { 815; CHECK-LABEL: float_from_i128: 816; CHECK: bl {{_?__floatuntisf}} 817 %conv = uitofp i128 %in to float 818 ret float %conv 819} 820 821define double @double_from_i128(i128 %in) { 822; CHECK-LABEL: double_from_i128: 823; CHECK: bl {{_?__floattidf}} 824 %conv = sitofp i128 %in to double 825 ret double %conv 826} 827 828define fp128 @fp128_from_i128(i128 %in) { 829; CHECK-LABEL: fp128_from_i128: 830; CHECK: bl {{_?__floatuntitf}} 831 %conv = uitofp i128 %in to fp128 832 ret fp128 %conv 833} 834 835define i128 @i128_from_float(float %in) { 836; CHECK-LABEL: i128_from_float 837; CHECK: bl {{_?__fixsfti}} 838 %conv = fptosi float %in to i128 839 ret i128 %conv 840} 841 842define i128 @i128_from_double(double %in) { 843; CHECK-LABEL: i128_from_double 844; CHECK: bl {{_?__fixunsdfti}} 845 %conv = fptoui double %in to i128 846 ret i128 %conv 847} 848 849define i128 @i128_from_fp128(fp128 %in) { 850; CHECK-LABEL: i128_from_fp128 851; CHECK: bl {{_?__fixtfti}} 852 %conv = fptosi fp128 %in to i128 853 ret i128 %conv 854} 855 856