1; This tests the NaCl intrinsics not related to atomic operations. 2 3; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ 4; RUN: --target x8632 --sandbox -i %s --args -O2 \ 5; RUN: -allow-externally-defined-symbols \ 6; RUN: | %if --need=target_X8632 --command FileCheck %s 7; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ 8; RUN: --target x8632 --sandbox -i %s --args -Om1 \ 9; RUN: -allow-externally-defined-symbols \ 10; RUN: | %if --need=target_X8632 --command FileCheck %s 11 12; Do another run w/ O2 and a different check-prefix (otherwise O2 and Om1 13; share the same "CHECK" prefix). This separate run helps check that 14; some code is optimized out. 15; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ 16; RUN: --target x8632 --sandbox -i %s --args -O2 \ 17; RUN: -allow-externally-defined-symbols \ 18; RUN: | %if --need=target_X8632 \ 19; RUN: --command FileCheck --check-prefix=CHECKO2REM %s 20 21; Do O2 runs without -sandbox to make sure llvm.nacl.read.tp gets 22; lowered to __nacl_read_tp instead of gs:0x0. 23; We also know that because it's O2, it'll have the O2REM optimizations. 24; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ 25; RUN: --target x8632 -i %s --args -O2 \ 26; RUN: -allow-externally-defined-symbols \ 27; RUN: | %if --need=target_X8632 \ 28; RUN: --command FileCheck --check-prefix=CHECKO2UNSANDBOXEDREM %s 29 30; RUN: %if --need=target_ARM32 \ 31; RUN: --command %p2i --filetype=obj --disassemble --target arm32 \ 32; RUN: -i %s --args -O2 \ 33; RUN: -allow-externally-defined-symbols \ 34; RUN: | %if --need=target_ARM32 \ 35; RUN: --command FileCheck --check-prefix ARM32 %s 36 37; RUN: %if --need=target_MIPS32 --need=allow_dump \ 38; RUN: --command %p2i --filetype=asm --assemble --disassemble --target mips32\ 39; RUN: -i %s --args -Om1 --skip-unimplemented \ 40; RUN: -allow-externally-defined-symbols \ 41; RUN: | %if --need=target_MIPS32 --need=allow_dump \ 42; RUN: --command FileCheck --check-prefix MIPS32 %s 43 44declare i8* @llvm.nacl.read.tp() 45declare void @llvm.nacl.longjmp(i8*, i32) 46declare i32 @llvm.nacl.setjmp(i8*) 47declare float @llvm.sqrt.f32(float) 48declare double @llvm.sqrt.f64(double) 49declare float @llvm.fabs.f32(float) 50declare double @llvm.fabs.f64(double) 51declare <4 x float> @llvm.fabs.v4f32(<4 x float>) 52declare void @llvm.trap() 53declare i16 @llvm.bswap.i16(i16) 54declare i32 @llvm.bswap.i32(i32) 55declare i64 @llvm.bswap.i64(i64) 56declare i32 @llvm.ctlz.i32(i32, i1) 57declare i64 @llvm.ctlz.i64(i64, i1) 58declare i32 @llvm.cttz.i32(i32, i1) 59declare i64 @llvm.cttz.i64(i64, i1) 60declare i32 @llvm.ctpop.i32(i32) 61declare i64 @llvm.ctpop.i64(i64) 62declare i8* @llvm.stacksave() 63declare void @llvm.stackrestore(i8*) 64 65define internal i32 @test_nacl_read_tp() { 66entry: 67 %ptr = call i8* @llvm.nacl.read.tp() 68 %__1 = ptrtoint i8* %ptr to i32 69 ret i32 %__1 70} 71; CHECK-LABEL: test_nacl_read_tp 72; CHECK: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 73; CHECKO2REM-LABEL: test_nacl_read_tp 74; CHECKO2REM: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 75; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp 76; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp 77; MIPS32-LABEL: test_nacl_read_tp 78; MIPS32: jal {{.*}} __nacl_read_tp 79 80define internal i32 @test_nacl_read_tp_more_addressing() { 81entry: 82 %ptr = call i8* @llvm.nacl.read.tp() 83 %__1 = ptrtoint i8* %ptr to i32 84 %x = add i32 %__1, %__1 85 %__3 = inttoptr i32 %x to i32* 86 %v = load i32, i32* %__3, align 1 87 %v_add = add i32 %v, 1 88 89 %ptr2 = call i8* @llvm.nacl.read.tp() 90 %__6 = ptrtoint i8* %ptr2 to i32 91 %y = add i32 %__6, 4 92 %__8 = inttoptr i32 %y to i32* 93 %v_add2 = add i32 %v, 4 94 store i32 %v_add2, i32* %__8, align 1 95 ret i32 %v 96} 97; CHECK-LABEL: test_nacl_read_tp_more_addressing 98; CHECK: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 99; CHECK: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 100; CHECKO2REM-LABEL: test_nacl_read_tp_more_addressing 101; CHECKO2REM: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 102; CHECKO2REM: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 103; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_more_addressing 104; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp 105; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp 106; MIPS32-LABEL: test_nacl_read_tp_more_addressing 107; MIPS32: jal {{.*}} __nacl_read_tp 108 109define internal i32 @test_nacl_read_tp_dead(i32 %a) { 110entry: 111 %ptr = call i8* @llvm.nacl.read.tp() 112 ; Not actually using the result of nacl read tp call. 113 ; In O2 mode this should be DCE'ed. 114 ret i32 %a 115} 116; Consider nacl.read.tp side-effect free, so it can be eliminated. 117; CHECKO2REM-LABEL: test_nacl_read_tp_dead 118; CHECKO2REM-NOT: mov e{{.*}}, DWORD PTR gs:0x0 119; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_dead 120; CHECKO2UNSANDBOXEDREM-NOT: call {{.*}} R_{{.*}} __nacl_read_tp 121; MIPS32-LABEL: test_nacl_read_tp_dead 122; MIPS32: jal {{.*}} __nacl_read_tp 123 124define internal i32 @test_setjmplongjmp(i32 %iptr_env) { 125entry: 126 %env = inttoptr i32 %iptr_env to i8* 127 %i = call i32 @llvm.nacl.setjmp(i8* %env) 128 %r1 = icmp eq i32 %i, 0 129 br i1 %r1, label %Zero, label %NonZero 130Zero: 131 ; Redundant inttoptr, to make --pnacl cast-eliding/re-insertion happy. 132 %env2 = inttoptr i32 %iptr_env to i8* 133 call void @llvm.nacl.longjmp(i8* %env2, i32 1) 134 ret i32 0 135NonZero: 136 ret i32 1 137} 138; CHECK-LABEL: test_setjmplongjmp 139; CHECK: call {{.*}} R_{{.*}} setjmp 140; CHECK: call {{.*}} R_{{.*}} longjmp 141; CHECKO2REM-LABEL: test_setjmplongjmp 142; CHECKO2REM: call {{.*}} R_{{.*}} setjmp 143; CHECKO2REM: call {{.*}} R_{{.*}} longjmp 144; ARM32-LABEL: test_setjmplongjmp 145; ARM32: bl {{.*}} setjmp 146; ARM32: bl {{.*}} longjmp 147; MIPS32-LABEL: test_setjmplongjmp 148; MIPS32: jal {{.*}} setjmp 149; MIPS32: jal {{.*}} longjmp 150 151define internal i32 @test_setjmp_unused(i32 %iptr_env, i32 %i_other) { 152entry: 153 %env = inttoptr i32 %iptr_env to i8* 154 %i = call i32 @llvm.nacl.setjmp(i8* %env) 155 ret i32 %i_other 156} 157; Don't consider setjmp side-effect free, so it's not eliminated if 158; result unused. 159; CHECKO2REM-LABEL: test_setjmp_unused 160; CHECKO2REM: call {{.*}} R_{{.*}} setjmp 161; MIPS32-LABEL: test_setjmp_unused 162; MIPS32: jal {{.*}} setjmp 163 164define internal float @test_sqrt_float(float %x, i32 %iptr) { 165entry: 166 %r = call float @llvm.sqrt.f32(float %x) 167 %r2 = call float @llvm.sqrt.f32(float %r) 168 %r3 = call float @llvm.sqrt.f32(float -0.0) 169 %r4 = fadd float %r2, %r3 170 ret float %r4 171} 172; CHECK-LABEL: test_sqrt_float 173; CHECK: sqrtss xmm{{.*}} 174; CHECK: sqrtss xmm{{.*}} 175; CHECK: sqrtss xmm{{.*}},DWORD PTR 176; ARM32-LABEL: test_sqrt_float 177; ARM32: vsqrt.f32 178; ARM32: vsqrt.f32 179; ARM32: vsqrt.f32 180; ARM32: vadd.f32 181; MIPS32-LABEL: test_sqrt_float 182; MIPS32: sqrt.s 183; MIPS32: sqrt.s 184; MIPS32: sqrt.s 185; MIPS32: add.s 186 187define internal float @test_sqrt_float_mergeable_load(float %x, i32 %iptr) { 188entry: 189 %__2 = inttoptr i32 %iptr to float* 190 %y = load float, float* %__2, align 4 191 %r5 = call float @llvm.sqrt.f32(float %y) 192 %r6 = fadd float %x, %r5 193 ret float %r6 194} 195; CHECK-LABEL: test_sqrt_float_mergeable_load 196; We could fold the load and the sqrt into one operation, but the 197; current folding only handles load + arithmetic op. The sqrt inst 198; is considered an intrinsic call and not an arithmetic op. 199; CHECK: sqrtss xmm{{.*}} 200; ARM32-LABEL: test_sqrt_float_mergeable_load 201; ARM32: vldr s{{.*}} 202; ARM32: vsqrt.f32 203 204define internal double @test_sqrt_double(double %x, i32 %iptr) { 205entry: 206 %r = call double @llvm.sqrt.f64(double %x) 207 %r2 = call double @llvm.sqrt.f64(double %r) 208 %r3 = call double @llvm.sqrt.f64(double -0.0) 209 %r4 = fadd double %r2, %r3 210 ret double %r4 211} 212; CHECK-LABEL: test_sqrt_double 213; CHECK: sqrtsd xmm{{.*}} 214; CHECK: sqrtsd xmm{{.*}} 215; CHECK: sqrtsd xmm{{.*}},QWORD PTR 216; ARM32-LABEL: test_sqrt_double 217; ARM32: vsqrt.f64 218; ARM32: vsqrt.f64 219; ARM32: vsqrt.f64 220; ARM32: vadd.f64 221; MIPS32-LABEL: test_sqrt_double 222; MIPS32: sqrt.d 223; MIPS32: sqrt.d 224; MIPS32: sqrt.d 225; MIPS32: add.d 226 227define internal double @test_sqrt_double_mergeable_load(double %x, i32 %iptr) { 228entry: 229 %__2 = inttoptr i32 %iptr to double* 230 %y = load double, double* %__2, align 8 231 %r5 = call double @llvm.sqrt.f64(double %y) 232 %r6 = fadd double %x, %r5 233 ret double %r6 234} 235; CHECK-LABEL: test_sqrt_double_mergeable_load 236; CHECK: sqrtsd xmm{{.*}} 237; ARM32-LABEL: test_sqrt_double_mergeable_load 238; ARM32: vldr d{{.*}} 239; ARM32: vsqrt.f64 240 241define internal float @test_sqrt_ignored(float %x, double %y) { 242entry: 243 %ignored1 = call float @llvm.sqrt.f32(float %x) 244 %ignored2 = call double @llvm.sqrt.f64(double %y) 245 ret float 0.0 246} 247; CHECKO2REM-LABEL: test_sqrt_ignored 248; CHECKO2REM-NOT: sqrtss 249; CHECKO2REM-NOT: sqrtsd 250; MIPS32-LABEL: test_sqrt_ignored 251; MIPS32: sqrt.s 252; MIPS32: sqrt.d 253 254define internal float @test_fabs_float(float %x) { 255entry: 256 %r = call float @llvm.fabs.f32(float %x) 257 %r2 = call float @llvm.fabs.f32(float %r) 258 %r3 = call float @llvm.fabs.f32(float -0.0) 259 %r4 = fadd float %r2, %r3 260 ret float %r4 261} 262;;; Specially check that the pand instruction doesn't try to operate on a 32-bit 263;;; (f32) memory operand, and instead uses two xmm registers. 264; CHECK-LABEL: test_fabs_float 265; CHECK: pcmpeqd 266; CHECK: psrld 267; CHECK: pand {{.*}}xmm{{.*}}xmm 268; CHECK: pcmpeqd 269; CHECK: psrld 270; CHECK: pand {{.*}}xmm{{.*}}xmm 271; CHECK: pcmpeqd 272; CHECK: psrld 273; CHECK: pand {{.*}}xmm{{.*}}xmm 274; MIPS32-LABEL: test_fabs_float 275; MIPS32: abs.s 276; MIPS32: abs.s 277; MIPS32: abs.s 278; MIPS32: add.s 279 280define internal double @test_fabs_double(double %x) { 281entry: 282 %r = call double @llvm.fabs.f64(double %x) 283 %r2 = call double @llvm.fabs.f64(double %r) 284 %r3 = call double @llvm.fabs.f64(double -0.0) 285 %r4 = fadd double %r2, %r3 286 ret double %r4 287} 288;;; Specially check that the pand instruction doesn't try to operate on a 64-bit 289;;; (f64) memory operand, and instead uses two xmm registers. 290; CHECK-LABEL: test_fabs_double 291; CHECK: pcmpeqd 292; CHECK: psrlq 293; CHECK: pand {{.*}}xmm{{.*}}xmm 294; CHECK: pcmpeqd 295; CHECK: psrlq 296; CHECK: pand {{.*}}xmm{{.*}}xmm 297; CHECK: pcmpeqd 298; CHECK: psrlq 299; CHECK: pand {{.*}}xmm{{.*}}xmm 300; MIPS32-LABEL: test_fabs_double 301; MIPS32: abs.d 302; MIPS32: abs.d 303; MIPS32: abs.d 304; MIPS32: add.d 305 306define internal <4 x float> @test_fabs_v4f32(<4 x float> %x) { 307entry: 308 %r = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x) 309 %r2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %r) 310 %r3 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) 311 %r4 = fadd <4 x float> %r2, %r3 312 ret <4 x float> %r4 313} 314; CHECK-LABEL: test_fabs_v4f32 315; CHECK: pcmpeqd 316; CHECK: psrld 317; CHECK: pand 318; CHECK: pcmpeqd 319; CHECK: psrld 320; CHECK: pand 321; CHECK: pcmpeqd 322; CHECK: psrld 323; CHECK: pand 324 325define internal i32 @test_trap(i32 %br) { 326entry: 327 %r1 = icmp eq i32 %br, 0 328 br i1 %r1, label %Zero, label %NonZero 329Zero: 330 call void @llvm.trap() 331 unreachable 332NonZero: 333 ret i32 1 334} 335; CHECK-LABEL: test_trap 336; CHECK: ud2 337; ARM32-LABEL: test_trap 338; ARM32: udf 339; MIPS32-LABEL: test_trap 340; MIPS32: teq zero,zero 341 342define internal i32 @test_bswap_16(i32 %x) { 343entry: 344 %x_trunc = trunc i32 %x to i16 345 %r = call i16 @llvm.bswap.i16(i16 %x_trunc) 346 %r_zext = zext i16 %r to i32 347 ret i32 %r_zext 348} 349; CHECK-LABEL: test_bswap_16 350; Make sure this is the right operand size so that the most significant bit 351; to least significant bit rotation happens at the right boundary. 352; CHECK: rol {{[abcd]x|si|di|bp|word ptr}},0x8 353; ARM32-LABEL: test_bswap_16 354; ARM32: rev 355; ARM32: lsr {{.*}} #16 356; MIPS32-LABEL: test_bswap_16 357; MIPS32: sll {{.*}},0x8 358; MIPS32: lui {{.*}},0xff 359; MIPS32: and 360; MIPS32: sll {{.*}},0x18 361; MIPS32: or 362; MIPS32: srl {{.*}},0x10 363; MIPS32: andi {{.*}},0xffff 364 365define internal i32 @test_bswap_32(i32 %x) { 366entry: 367 %r = call i32 @llvm.bswap.i32(i32 %x) 368 ret i32 %r 369} 370; CHECK-LABEL: test_bswap_32 371; CHECK: bswap e{{.*}} 372; ARM32-LABEL: test_bswap_32 373; ARM32: rev 374; MIPS32-LABEL: test_bswap_32 375; MIPS32: srl {{.*}},0x18 376; MIPS32: srl {{.*}},0x8 377; MIPS32: andi {{.*}},0xff00 378; MIPS32: or 379; MIPS32: sll {{.*}},0x8 380; MIPS32: lui {{.*}},0xff 381; MIPS32: and 382; MIPS32: sll {{.*}},0x18 383; MIPS32: or 384; MIPS32: or 385 386define internal i64 @test_bswap_64(i64 %x) { 387entry: 388 %r = call i64 @llvm.bswap.i64(i64 %x) 389 ret i64 %r 390} 391; CHECK-LABEL: test_bswap_64 392; CHECK: bswap e{{.*}} 393; CHECK: bswap e{{.*}} 394; ARM32-LABEL: test_bswap_64 395; ARM32: rev 396; ARM32: rev 397; MIPS32-LABEL: test_bswap_64 398; MIPS32: sll {{.*}},0x8 399; MIPS32: srl {{.*}},0x18 400; MIPS32: srl {{.*}},0x8 401; MIPS32: andi {{.*}},0xff00 402; MIPS32: lui {{.*}},0xff 403; MIPS32: or 404; MIPS32: and 405; MIPS32: sll {{.*}},0x18 406; MIPS32: or 407; MIPS32: srl {{.*}},0x18 408; MIPS32: srl {{.*}},0x8 409; MIPS32: andi {{.*}},0xff00 410; MIPS32: or 411; MIPS32: or 412; MIPS32: sll {{.*}},0x8 413; MIPS32: and 414; MIPS32: sll {{.*}},0x18 415; MIPS32: or 416; MIPS32: or 417 418define internal i64 @test_bswap_64_undef() { 419entry: 420 %r = call i64 @llvm.bswap.i64(i64 undef) 421 ret i64 %r 422} 423; CHECK-LABEL: test_bswap_64_undef 424; CHECK: bswap e{{.*}} 425; CHECK: bswap e{{.*}} 426; ARM32-LABEL: test_bswap_64 427; ARM32: rev 428; ARM32: rev 429; MIPS32-LABEL: test_bswap_64_undef 430; MIPS32: sll {{.*}},0x8 431; MIPS32: srl {{.*}},0x18 432; MIPS32: srl {{.*}},0x8 433; MIPS32: andi {{.*}},0xff00 434; MIPS32: lui {{.*}},0xff 435; MIPS32: or 436; MIPS32: and 437; MIPS32: sll {{.*}},0x18 438; MIPS32: or 439; MIPS32: srl {{.*}},0x18 440; MIPS32: srl {{.*}},0x8 441; MIPS32: andi {{.*}},0xff00 442; MIPS32: or 443; MIPS32: or 444; MIPS32: sll {{.*}},0x8 445; MIPS32: and 446; MIPS32: sll {{.*}},0x18 447; MIPS32: or 448; MIPS32: or 449 450define internal i32 @test_ctlz_32(i32 %x) { 451entry: 452 %r = call i32 @llvm.ctlz.i32(i32 %x, i1 false) 453 ret i32 %r 454} 455; CHECK-LABEL: test_ctlz_32 456; TODO(jvoung): If we detect that LZCNT is supported, then use that 457; and avoid the need to do the cmovne and xor stuff to guarantee that 458; the result is well-defined w/ input == 0. 459; CHECK: bsr [[REG_TMP:e.*]],{{.*}} 460; CHECK: mov [[REG_RES:e.*]],0x3f 461; CHECK: cmovne [[REG_RES]],[[REG_TMP]] 462; CHECK: xor [[REG_RES]],0x1f 463; ARM32-LABEL: test_ctlz_32 464; ARM32: clz 465; MIPS32-LABEL: test_ctlz_32 466; MIPS32: clz 467 468define internal i32 @test_ctlz_32_const() { 469entry: 470 %r = call i32 @llvm.ctlz.i32(i32 123456, i1 false) 471 ret i32 %r 472} 473; Could potentially constant fold this, but the front-end should have done that. 474; The dest operand must be a register and the source operand must be a register 475; or memory. 476; CHECK-LABEL: test_ctlz_32_const 477; CHECK: bsr e{{.*}},{{.*}}e{{.*}} 478; ARM32-LABEL: test_ctlz_32_const 479; ARM32: clz 480; MIPS32-LABEL: test_ctlz_32_const 481; MIPS32: clz 482 483define internal i32 @test_ctlz_32_ignored(i32 %x) { 484entry: 485 %ignored = call i32 @llvm.ctlz.i32(i32 %x, i1 false) 486 ret i32 1 487} 488; CHECKO2REM-LABEL: test_ctlz_32_ignored 489; CHECKO2REM-NOT: bsr 490 491define internal i64 @test_ctlz_64(i64 %x) { 492entry: 493 %r = call i64 @llvm.ctlz.i64(i64 %x, i1 false) 494 ret i64 %r 495} 496; CHECKO2REM-LABEL: test_ctlz_64 497; CHECK-LABEL: test_ctlz_64 498; CHECK: bsr [[REG_TMP1:e.*]],{{.*}} 499; CHECK: mov [[REG_RES1:e.*]],0x3f 500; CHECK: cmovne [[REG_RES1]],[[REG_TMP1]] 501; CHECK: xor [[REG_RES1]],0x1f 502; CHECK: add [[REG_RES1]],0x20 503; CHECK: bsr [[REG_RES2:e.*]],{{.*}} 504; CHECK: xor [[REG_RES2]],0x1f 505; CHECK: test [[REG_UPPER:.*]],[[REG_UPPER]] 506; CHECK: cmove [[REG_RES2]],[[REG_RES1]] 507; CHECK: mov {{.*}},0x0 508; ARM32-LABEL: test_ctlz_64 509; ARM32: clz 510; ARM32: cmp {{.*}}, #0 511; ARM32: add {{.*}}, #32 512; ARM32: clzne 513; ARM32: mov {{.*}}, #0 514; MIPS32-LABEL: test_ctlz_64 515; MIPS32: clz 516; MIPS32: clz 517; MIPS32: addiu 518; MIPS32: movn 519; MIPS32: addiu 520 521define internal i32 @test_ctlz_64_const(i64 %x) { 522entry: 523 %r = call i64 @llvm.ctlz.i64(i64 123456789012, i1 false) 524 %r2 = trunc i64 %r to i32 525 ret i32 %r2 526} 527; CHECK-LABEL: test_ctlz_64_const 528; CHECK: bsr e{{.*}},{{.*}}e{{.*}} 529; CHECK: bsr e{{.*}},{{.*}}e{{.*}} 530; ARM32-LABEL: test_ctlz_64 531; ARM32: clz 532; ARM32: clzne 533; MIPS32-LABEL: test_ctlz_64_const 534; MIPS32: clz 535; MIPS32: clz 536; MIPS32: addiu 537; MIPS32: movn 538; MIPS32: addiu 539 540define internal i32 @test_ctlz_64_ignored(i64 %x) { 541entry: 542 %ignored = call i64 @llvm.ctlz.i64(i64 1234567890, i1 false) 543 ret i32 2 544} 545; CHECKO2REM-LABEL: test_ctlz_64_ignored 546; CHECKO2REM-NOT: bsr 547 548define internal i32 @test_cttz_32(i32 %x) { 549entry: 550 %r = call i32 @llvm.cttz.i32(i32 %x, i1 false) 551 ret i32 %r 552} 553; CHECK-LABEL: test_cttz_32 554; CHECK: bsf [[REG_IF_NOTZERO:e.*]],{{.*}} 555; CHECK: mov [[REG_IF_ZERO:e.*]],0x20 556; CHECK: cmovne [[REG_IF_ZERO]],[[REG_IF_NOTZERO]] 557; ARM32-LABEL: test_cttz_32 558; ARM32: rbit 559; ARM32: clz 560; MIPS32-LABEL: test_cttz_32 561; MIPS32: addiu 562; MIPS32: nor 563; MIPS32: and 564; MIPS32: clz 565; MIPS32: li 566; MIPS32: subu 567 568define internal i64 @test_cttz_64(i64 %x) { 569entry: 570 %r = call i64 @llvm.cttz.i64(i64 %x, i1 false) 571 ret i64 %r 572} 573; CHECK-LABEL: test_cttz_64 574; CHECK: bsf [[REG_IF_NOTZERO:e.*]],{{.*}} 575; CHECK: mov [[REG_RES1:e.*]],0x20 576; CHECK: cmovne [[REG_RES1]],[[REG_IF_NOTZERO]] 577; CHECK: add [[REG_RES1]],0x20 578; CHECK: bsf [[REG_RES2:e.*]],[[REG_LOWER:.*]] 579; CHECK: test [[REG_LOWER]],[[REG_LOWER]] 580; CHECK: cmove [[REG_RES2]],[[REG_RES1]] 581; CHECK: mov {{.*}},0x0 582; ARM32-LABEL: test_cttz_64 583; ARM32: rbit 584; ARM32: rbit 585; ARM32: clz 586; ARM32: cmp {{.*}}, #0 587; ARM32: add {{.*}}, #32 588; ARM32: clzne 589; ARM32: mov {{.*}}, #0 590; MIPS32-LABEL: test_cttz_64 591; MIPS32: addiu 592; MIPS32: nor 593; MIPS32: and 594; MIPS32: clz 595; MIPS32: li 596; MIPS32: subu 597; MIPS32: addiu 598; MIPS32: nor 599; MIPS32: and 600; MIPS32: clz 601; MIPS32: li 602; MIPS32: subu 603 604define internal i32 @test_popcount_32(i32 %x) { 605entry: 606 %r = call i32 @llvm.ctpop.i32(i32 %x) 607 ret i32 %r 608} 609; CHECK-LABEL: test_popcount_32 610; CHECK: call {{.*}} R_{{.*}} __popcountsi2 611; ARM32-LABEL: test_popcount_32 612; ARM32: bl {{.*}} __popcountsi2 613; MIPS32-LABEL: test_popcount_32 614; MIPS32: jal {{.*}} __popcountsi2 615 616define internal i64 @test_popcount_64(i64 %x) { 617entry: 618 %r = call i64 @llvm.ctpop.i64(i64 %x) 619 ret i64 %r 620} 621; CHECK-LABEL: test_popcount_64 622; CHECK: call {{.*}} R_{{.*}} __popcountdi2 623; __popcountdi2 only returns a 32-bit result, so clear the upper bits of 624; the return value just in case. 625; CHECK: mov {{.*}},0x0 626; ARM32-LABEL: test_popcount_64 627; ARM32: bl {{.*}} __popcountdi2 628; ARM32: mov {{.*}}, #0 629; MIPS32-LABEL: test_popcount_64 630; MIPS32: jal {{.*}} __popcountdi2 631 632define internal i32 @test_popcount_64_ret_i32(i64 %x) { 633entry: 634 %r_i64 = call i64 @llvm.ctpop.i64(i64 %x) 635 %r = trunc i64 %r_i64 to i32 636 ret i32 %r 637} 638; If there is a trunc, then the mov {{.*}}, 0 is dead and gets optimized out. 639; CHECKO2REM-LABEL: test_popcount_64_ret_i32 640; CHECKO2REM: call {{.*}} R_{{.*}} __popcountdi2 641; CHECKO2REM-NOT: mov {{.*}}, 0 642; MIPS32-LABEL: test_popcount_64_ret_i32 643; MIPS32: jal {{.*}} __popcountdi2 644; MIPS32: sw v0,{{.*}} 645; MIPS32: sw v1,{{.*}} 646; MIPS32: lw v0,{{.*}} 647; MIPS32: lw ra,{{.*}} 648 649define internal void @test_stacksave_noalloca() { 650entry: 651 %sp = call i8* @llvm.stacksave() 652 call void @llvm.stackrestore(i8* %sp) 653 ret void 654} 655; CHECK-LABEL: test_stacksave_noalloca 656; CHECK: mov {{.*}},esp 657; CHECK: mov esp,{{.*}} 658; ARM32-LABEL: test_stacksave_noalloca 659; ARM32: mov {{.*}}, sp 660; ARM32: mov sp, {{.*}} 661; MIPS32-LABEL: test_stacksave_noalloca 662; MIPS32: sw sp,{{.*}} 663; MIPS32: lw [[REG:.*]],0(sp) 664; MIPS32: move sp,[[REG]] 665 666declare i32 @foo(i32 %x) 667 668define internal void @test_stacksave_multiple(i32 %x) { 669entry: 670 %x_4 = mul i32 %x, 4 671 %sp1 = call i8* @llvm.stacksave() 672 %tmp1 = alloca i8, i32 %x_4, align 4 673 674 %sp2 = call i8* @llvm.stacksave() 675 %tmp2 = alloca i8, i32 %x_4, align 4 676 677 %y = call i32 @foo(i32 %x) 678 679 %sp3 = call i8* @llvm.stacksave() 680 %tmp3 = alloca i8, i32 %x_4, align 4 681 682 %__9 = bitcast i8* %tmp1 to i32* 683 store i32 %y, i32* %__9, align 1 684 685 %__10 = bitcast i8* %tmp2 to i32* 686 store i32 %x, i32* %__10, align 1 687 688 %__11 = bitcast i8* %tmp3 to i32* 689 store i32 %x, i32* %__11, align 1 690 691 call void @llvm.stackrestore(i8* %sp1) 692 ret void 693} 694; CHECK-LABEL: test_stacksave_multiple 695; lea is used to copy from esp for the allocas. 696; Otherwise, only one stacksave is live. 697; CHECK: mov ebp,esp 698; CHECK: mov {{.*}},esp 699; CHECK: lea {{.*}},[esp+0x10] 700; CHECK: lea {{.*}},[esp+0x10] 701; CHECK: call 702; CHECK: mov esp,{{.*}} 703; CHECK: mov esp,ebp 704; ARM32-LABEL: test_stacksave_multiple 705; ARM32: mov {{.*}}, sp 706; ARM32: mov {{.*}}, sp 707; ARM32: mov {{.*}}, sp 708; ARM32: mov sp, {{.*}} 709; MIPS32-LABEL: test_stacksave_multiple 710; MIPS32: sw sp,[[MEMLOC:.*]] 711; MIPS32: sw sp,{{.*}} 712; MIPS32: sw sp,{{.*}} 713; MIPS32: lw [[REG:.*]],[[MEMLOC]] 714; MIPS32: move sp,[[REG]] 715