1; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin -no-integrated-as | FileCheck %s 2 3; There should be no stack manipulations between the inline asm and ret. 4; CHECK: test1 5; CHECK: InlineAsm End 6; CHECK-NEXT: ret 7define x86_fp80 @test1() { 8 %tmp85 = call x86_fp80 asm sideeffect "fld0", "={st(0)}"() 9 ret x86_fp80 %tmp85 10} 11 12; CHECK: test2 13; CHECK: InlineAsm End 14; CHECK-NEXT: ret 15define double @test2() { 16 %tmp85 = call double asm sideeffect "fld0", "={st(0)}"() 17 ret double %tmp85 18} 19 20; Setting up argument in st(0) should be a single fld. 21; CHECK: test3 22; CHECK: fld 23; CHECK-NEXT: InlineAsm Start 24; Asm consumes stack, nothing should be popped. 25; CHECK: InlineAsm End 26; CHECK-NOT: fstp 27; CHECK: ret 28define void @test3(x86_fp80 %X) { 29 call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( x86_fp80 %X) 30 ret void 31} 32 33; CHECK: test4 34; CHECK: fld 35; CHECK-NEXT: InlineAsm Start 36; CHECK: InlineAsm End 37; CHECK-NOT: fstp 38; CHECK: ret 39define void @test4(double %X) { 40 call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( double %X) 41 ret void 42} 43 44; Same as test3/4, but using value from fadd. 45; The fadd can be done in xmm or x87 regs - we don't test that. 46; CHECK: test5 47; CHECK: InlineAsm End 48; CHECK-NOT: fstp 49; CHECK: ret 50define void @test5(double %X) { 51 %Y = fadd double %X, 123.0 52 call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( double %Y) 53 ret void 54} 55 56; CHECK: test6 57define void @test6(double %A, double %B, double %C, 58 double %D, double %E) nounwind { 59entry: 60; Uses the same value twice, should have one fstp after the asm. 61; CHECK: foo 62; CHECK: InlineAsm End 63; CHECK-NEXT: fstp 64; CHECK-NOT: fstp 65 tail call void asm sideeffect "foo $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %A, double %A ) nounwind 66; Uses two different values, should be in st(0)/st(1) and both be popped. 67; CHECK: bar 68; CHECK: InlineAsm End 69; CHECK-NEXT: fstp 70; CHECK-NEXT: fstp 71 tail call void asm sideeffect "bar $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %B, double %C ) nounwind 72; Uses two different values, one of which isn't killed in this asm, it 73; should not be popped after the asm. 74; CHECK: baz 75; CHECK: InlineAsm End 76; CHECK-NEXT: fstp 77; CHECK-NOT: fstp 78 tail call void asm sideeffect "baz $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %D, double %E ) nounwind 79; This is the last use of %D, so it should be popped after. 80; CHECK: baz 81; CHECK: InlineAsm End 82; CHECK-NEXT: fstp 83; CHECK-NOT: fstp 84; CHECK: ret 85 tail call void asm sideeffect "baz $0", "f,~{dirflag},~{fpsr},~{flags}"( double %D ) nounwind 86 ret void 87} 88 89; PR4185 90; Passing a non-killed value to asm in {st}. 91; Make sure it is duped before. 92; asm kills st(0), so we shouldn't pop anything 93; CHECK: testPR4185 94; CHECK: fld %st(0) 95; CHECK: fistpl 96; CHECK-NOT: fstp 97; CHECK: fistpl 98; CHECK-NOT: fstp 99; CHECK: ret 100; A valid alternative would be to remat the constant pool load before each 101; inline asm. 102define void @testPR4185() { 103return: 104 call void asm sideeffect "fistpl $0", "{st},~{st}"(double 1.000000e+06) 105 call void asm sideeffect "fistpl $0", "{st},~{st}"(double 1.000000e+06) 106 ret void 107} 108 109; Passing a non-killed value through asm in {st}. 110; Make sure it is not duped before. 111; Second asm kills st(0), so we shouldn't pop anything 112; CHECK: testPR4185b 113; CHECK-NOT: fld %st(0) 114; CHECK: fistl 115; CHECK-NOT: fstp 116; CHECK: fistpl 117; CHECK-NOT: fstp 118; CHECK: ret 119; A valid alternative would be to remat the constant pool load before each 120; inline asm. 121define void @testPR4185b() { 122return: 123 call void asm sideeffect "fistl $0", "{st}"(double 1.000000e+06) 124 call void asm sideeffect "fistpl $0", "{st},~{st}"(double 1.000000e+06) 125 ret void 126} 127 128; PR4459 129; The return value from ceil must be duped before being consumed by asm. 130; CHECK: testPR4459 131; CHECK: ceil 132; CHECK: fld %st(0) 133; CHECK-NOT: fxch 134; CHECK: fistpl 135; CHECK-NOT: fxch 136; CHECK: fstpt 137; CHECK: test 138define void @testPR4459(x86_fp80 %a) { 139entry: 140 %0 = call x86_fp80 @ceil(x86_fp80 %a) 141 call void asm sideeffect "fistpl $0", "{st},~{st}"( x86_fp80 %0) 142 call void @test3(x86_fp80 %0 ) 143 ret void 144} 145declare x86_fp80 @ceil(x86_fp80) 146 147; PR4484 148; test1 leaves a value on the stack that is needed after the asm. 149; CHECK: testPR4484 150; CHECK: calll _test1 151; CHECK-NOT: fstp 152; Load %a from stack after ceil 153; CHECK: fldt 154; CHECK-NOT: fxch 155; CHECK: fistpl 156; CHECK-NOT: fstp 157; Set up call to test. 158; CHECK: fstpt 159; CHECK: test 160define void @testPR4484(x86_fp80 %a) { 161entry: 162 %0 = call x86_fp80 @test1() 163 call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %a) 164 call void @test3(x86_fp80 %0) 165 ret void 166} 167 168; PR4485 169; CHECK: testPR4485 170define void @testPR4485(x86_fp80* %a) { 171entry: 172 %0 = load x86_fp80, x86_fp80* %a, align 16 173 %1 = fmul x86_fp80 %0, 0xK4006B400000000000000 174 %2 = fmul x86_fp80 %1, 0xK4012F424000000000000 175 tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %2) 176 %3 = load x86_fp80, x86_fp80* %a, align 16 177 %4 = fmul x86_fp80 %3, 0xK4006B400000000000000 178 %5 = fmul x86_fp80 %4, 0xK4012F424000000000000 179 tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %5) 180 ret void 181} 182 183; An input argument in a fixed position is implicitly popped by the asm only if 184; the input argument is tied to an output register, or it is in the clobber list. 185; The clobber list case is tested above. 186; 187; This doesn't implicitly pop the stack: 188; 189; void fist1(long double x, int *p) { 190; asm volatile ("fistl %1" : : "t"(x), "m"(*p)); 191; } 192; 193; CHECK: fist1 194; CHECK: fldt 195; CHECK: fistl (%e 196; CHECK: fstp 197; CHECK: ret 198define void @fist1(x86_fp80 %x, i32* %p) nounwind ssp { 199entry: 200 tail call void asm sideeffect "fistl $1", "{st},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, i32* %p) nounwind 201 ret void 202} 203 204; Here, the input operand is tied to an output which means that is is 205; implicitly popped (and then the output is implicitly pushed). 206; 207; long double fist2(long double x, int *p) { 208; long double y; 209; asm ("fistl %1" : "=&t"(y) : "0"(x), "m"(*p) : "memory"); 210; return y; 211; } 212; 213; CHECK: fist2 214; CHECK: fldt 215; CHECK: fistl (%e 216; CHECK-NOT: fstp 217; CHECK: ret 218define x86_fp80 @fist2(x86_fp80 %x, i32* %p) nounwind ssp { 219entry: 220 %0 = tail call x86_fp80 asm "fistl $2", "=&{st},0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, i32* %p) nounwind 221 ret x86_fp80 %0 222} 223 224; An 'f' constraint is never implicitly popped: 225; 226; void fucomp1(long double x, long double y) { 227; asm volatile ("fucomp %1" : : "t"(x), "f"(y) : "st"); 228; } 229; CHECK: fucomp1 230; CHECK: fldt 231; CHECK: fldt 232; CHECK: fucomp %st 233; CHECK: fstp 234; CHECK-NOT: fstp 235; CHECK: ret 236define void @fucomp1(x86_fp80 %x, x86_fp80 %y) nounwind ssp { 237entry: 238 tail call void asm sideeffect "fucomp $1", "{st},f,~{st},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind 239 ret void 240} 241 242; The 'u' constraint is only popped implicitly when clobbered: 243; 244; void fucomp2(long double x, long double y) { 245; asm volatile ("fucomp %1" : : "t"(x), "u"(y) : "st"); 246; } 247; 248; void fucomp3(long double x, long double y) { 249; asm volatile ("fucompp %1" : : "t"(x), "u"(y) : "st", "st(1)"); 250; } 251; 252; CHECK: fucomp2 253; CHECK: fldt 254; CHECK: fldt 255; CHECK: fucomp %st(1) 256; CHECK: fstp 257; CHECK-NOT: fstp 258; CHECK: ret 259; 260; CHECK: fucomp3 261; CHECK: fldt 262; CHECK: fldt 263; CHECK: fucompp %st(1) 264; CHECK-NOT: fstp 265; CHECK: ret 266define void @fucomp2(x86_fp80 %x, x86_fp80 %y) nounwind ssp { 267entry: 268 tail call void asm sideeffect "fucomp $1", "{st},{st(1)},~{st},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind 269 ret void 270} 271define void @fucomp3(x86_fp80 %x, x86_fp80 %y) nounwind ssp { 272entry: 273 tail call void asm sideeffect "fucompp $1", "{st},{st(1)},~{st},~{st(1)},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind 274 ret void 275} 276 277; One input, two outputs, one dead output. 278%complex = type { float, float } 279; CHECK: sincos1 280; CHECK: flds 281; CHECK-NOT: fxch 282; CHECK: sincos 283; CHECK-NOT: fstp 284; CHECK: fstp %st(1) 285; CHECK-NOT: fstp 286; CHECK: ret 287define float @sincos1(float %x) nounwind ssp { 288entry: 289 %0 = tail call %complex asm "sincos", "={st},={st(1)},0,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind 290 %asmresult = extractvalue %complex %0, 0 291 ret float %asmresult 292} 293 294; Same thing, swapped output operands. 295; CHECK: sincos2 296; CHECK: flds 297; CHECK-NOT: fxch 298; CHECK: sincos 299; CHECK-NOT: fstp 300; CHECK: fstp %st(1) 301; CHECK-NOT: fstp 302; CHECK: ret 303define float @sincos2(float %x) nounwind ssp { 304entry: 305 %0 = tail call %complex asm "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind 306 %asmresult = extractvalue %complex %0, 1 307 ret float %asmresult 308} 309 310; Clobber st(0) after it was live-out/dead from the previous asm. 311; CHECK: sincos3 312; Load x, make a copy for the second asm. 313; CHECK: flds 314; CHECK: fld %st(0) 315; CHECK: sincos 316; Discard dead result in st(0), bring x to the top. 317; CHECK: fstp %st(0) 318; CHECK: fxch 319; x is now in st(0) for the second asm 320; CHECK: sincos 321; Discard both results. 322; CHECK: fstp 323; CHECK: fstp 324; CHECK: ret 325define float @sincos3(float %x) nounwind ssp { 326entry: 327 %0 = tail call %complex asm sideeffect "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind 328 %1 = tail call %complex asm sideeffect "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind 329 %asmresult = extractvalue %complex %0, 0 330 ret float %asmresult 331} 332 333; Pass the same value in two fixed stack slots. 334; CHECK: PR10602 335; CHECK: flds LCPI 336; CHECK: fld %st(0) 337; CHECK: fcomi %st(1), %st(0) 338define i32 @PR10602() nounwind ssp { 339entry: 340 %0 = tail call i32 asm "fcomi $2, $1; pushf; pop $0", "=r,{st},{st(1)},~{dirflag},~{fpsr},~{flags}"(double 2.000000e+00, double 2.000000e+00) nounwind 341 ret i32 %0 342} 343 344; <rdar://problem/16952634> 345; X87 stackifier asserted when there was an ST register defined by an 346; inline-asm instruction and the ST register was live across another 347; inline-asm instruction. 348; 349; INLINEASM <es:frndint> [sideeffect] [attdialect], $0:[regdef], %ST0<imp-def,tied5>, $1:[reguse tiedto:$0], %ST0<tied3>, $2:[clobber], %EFLAGS<earlyclobber,imp-def,dead> 350; INLINEASM <es:fldcw $0> [sideeffect] [mayload] [attdialect], $0:[mem], %EAX<undef>, 1, %noreg, 0, %noreg, $1:[clobber], %EFLAGS<earlyclobber,imp-def,dead> 351; %FP0<def> = COPY %ST0 352 353; CHECK-LABEL: _test_live_st 354; CHECK: ## InlineAsm Start 355; CHECK: frndint 356; CHECK: ## InlineAsm End 357; CHECK: ## InlineAsm Start 358; CHECK: fldcw 359; CHECK: ## InlineAsm End 360 361%struct.fpu_t = type { [8 x x86_fp80], x86_fp80, %struct.anon1, %struct.anon2, i32, i8, [15 x i8] } 362%struct.anon1 = type { i32, i32, i32 } 363%struct.anon2 = type { i32, i32, i32, i32 } 364 365@fpu = external global %struct.fpu_t, align 16 366 367; Function Attrs: ssp 368define void @test_live_st(i32 %a1) { 369entry: 370 %0 = load x86_fp80, x86_fp80* undef, align 16 371 %cond = icmp eq i32 %a1, 1 372 br i1 %cond, label %sw.bb4.i, label %_Z5tointRKe.exit 373 374sw.bb4.i: 375 %1 = call x86_fp80 asm sideeffect "frndint", "={st},0,~{dirflag},~{fpsr},~{flags}"(x86_fp80 %0) 376 call void asm sideeffect "fldcw $0", "*m,~{dirflag},~{fpsr},~{flags}"(i32* undef) 377 br label %_Z5tointRKe.exit 378 379_Z5tointRKe.exit: 380 %result.0.i = phi x86_fp80 [ %1, %sw.bb4.i ], [ %0, %entry ] 381 %conv.i1814 = fptosi x86_fp80 %result.0.i to i32 382 %conv626 = sitofp i32 %conv.i1814 to x86_fp80 383 store x86_fp80 %conv626, x86_fp80* getelementptr inbounds (%struct.fpu_t, %struct.fpu_t* @fpu, i32 0, i32 1) 384 br label %return 385 386return: 387 ret void 388} 389 390; Check that x87 stackifier is correctly rewriting FP registers to ST registers. 391; 392; CHECK-LABEL: _test_operand_rewrite 393; CHECK: ## InlineAsm Start 394; CHECK: foo %st(0), %st(1) 395; CHECK: ## InlineAsm End 396 397define double @test_operand_rewrite() { 398entry: 399 %0 = tail call { double, double } asm sideeffect "foo $0, $1", "={st},={st(1)},~{dirflag},~{fpsr},~{flags}"() 400 %asmresult = extractvalue { double, double } %0, 0 401 %asmresult1 = extractvalue { double, double } %0, 1 402 %sub = fsub double %asmresult, %asmresult1 403 ret double %sub 404} 405