; RUN: llc < %s -mattr=-avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort=1 | FileCheck %s
; RUN: llc < %s -mattr=-avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -pass-remarks-missed=isel 2>&1 >/dev/null | FileCheck %s --check-prefix=STDERR --allow-empty
; RUN: llc < %s -mattr=+avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort=1 | FileCheck %s --check-prefix=AVX

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.0.0"

; Make sure that fast-isel folds the immediate into the binop even though it
; is non-canonical.
define i32 @test1(i32 %i) nounwind ssp {
  %and = and i32 8, %i
  ret i32 %and
}

; CHECK-LABEL: test1:
; CHECK: andl	$8,


; rdar://9289512 - The load should fold into the compare.
define void @test2(i64 %x) nounwind ssp {
entry:
  %x.addr = alloca i64, align 8
  store i64 %x, i64* %x.addr, align 8
  %tmp = load i64, i64* %x.addr, align 8
  %cmp = icmp sgt i64 %tmp, 42
  br i1 %cmp, label %if.then, label %if.end

if.then:                                          ; preds = %entry
  br label %if.end

if.end:                                           ; preds = %if.then, %entry
  ret void
; CHECK-LABEL: test2:
; CHECK: movq	%rdi, -8(%rsp)
; CHECK: cmpq	$42, -8(%rsp)
}


@G = external global i32
define i64 @test3() nounwind {
  %A = ptrtoint i32* @G to i64
  ret i64 %A
; CHECK-LABEL: test3:
; CHECK: movq _G@GOTPCREL(%rip), %rax
; CHECK-NEXT: ret
}


; rdar://9289558
@rtx_length = external global [153 x i8]

define i32 @test4(i64 %idxprom9) nounwind {
  %arrayidx10 = getelementptr inbounds [153 x i8], [153 x i8]* @rtx_length, i32 0, i64 %idxprom9
  %tmp11 = load i8, i8* %arrayidx10, align 1
  %conv = zext i8 %tmp11 to i32
  ret i32 %conv

; CHECK-LABEL: test4:
; CHECK: movq	_rtx_length@GOTPCREL(%rip), %rax
; CHECK-NEXT: movzbl	(%rax,%rdi), %eax
; CHECK-NEXT: ret
}


; PR3242 - Out of range shifts should not be folded by fastisel.
define void @test5(i32 %x, i32* %p) nounwind {
  %y = ashr i32 %x, 50000
  store i32 %y, i32* %p
  ret void

; CHECK-LABEL: test5:
; CHECK: movl	$50000, %ecx
; CHECK: sarl	%cl, %edi
; CHECK: ret
}

; rdar://9289501 - fast isel should fold trivial multiplies to shifts.
define i64 @test6(i64 %x) nounwind ssp {
entry:
  %mul = mul nsw i64 %x, 8
  ret i64 %mul

; CHECK-LABEL: test6:
; CHECK: shlq	$3, {{%r[a-z]+}}
}

define i32 @test7(i32 %x) nounwind ssp {
entry:
  %mul = mul nsw i32 %x, 8
  ret i32 %mul
; CHECK-LABEL: test7:
; CHECK: shll	$3, {{%e[a-z]+}}
}


; rdar://9289507 - folding of immediates into 64-bit operations.
define i64 @test8(i64 %x) nounwind ssp {
entry:
  %add = add nsw i64 %x, 7
  ret i64 %add

; CHECK-LABEL: test8:
; CHECK: addq	$7, {{%r[a-z]+}}
}

define i64 @test9(i64 %x) nounwind ssp {
entry:
  %add = mul nsw i64 %x, 7
  ret i64 %add
; CHECK-LABEL: test9:
; CHECK: imulq	$7, %rdi, %rax
}

; rdar://9297011 - Don't reject udiv by a power of 2.
define i32 @test10(i32 %X) nounwind {
  %Y = udiv i32 %X, 8
  ret i32 %Y
; CHECK-LABEL: test10:
; CHECK: shrl	$3,
}

define i32 @test11(i32 %X) nounwind {
  %Y = sdiv exact i32 %X, 8
  ret i32 %Y
; CHECK-LABEL: test11:
; CHECK: sarl	$3,
}


; rdar://9297006 - Trunc to bool.
define void @test12(i8 %tmp) nounwind ssp noredzone {
entry:
  %tobool = trunc i8 %tmp to i1
  br i1 %tobool, label %if.then, label %if.end

if.then:                                          ; preds = %entry
  call void @test12(i8 0) noredzone
  br label %if.end

if.end:                                           ; preds = %if.then, %entry
  ret void
; CHECK-LABEL: test12:
; CHECK: testb	$1,
; CHECK-NEXT: je L
; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: callq
}

declare void @test13f(i1 %X)

define void @test13() nounwind {
  call void @test13f(i1 0)
  ret void
; CHECK-LABEL: test13:
; CHECK: xorl %edi, %edi
; CHECK-NEXT: callq
}


; rdar://9297003 - fast isel bails out on all functions taking bools
define void @test14(i8 %tmp) nounwind ssp noredzone {
entry:
  %tobool = trunc i8 %tmp to i1
  call void @test13f(i1 zeroext %tobool) noredzone
  ret void
; CHECK-LABEL: test14:
; CHECK: andb	$1,
; CHECK: callq
}

declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)

; rdar://9289488 - fast-isel shouldn't bail out on llvm.memcpy
define void @test15(i8* %a, i8* %b) nounwind {
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 %b, i64 4, i1 false)
  ret void
; CHECK-LABEL: test15:
; CHECK-NEXT: movl	(%rsi), %eax
; CHECK-NEXT: movl	%eax, (%rdi)
; CHECK-NEXT: ret
}

; Handling for varargs calls
declare void @test16callee(...) nounwind
define void @test16() nounwind {
; CHECK-LABEL: test16:
; CHECK: movl $1, %edi
; CHECK: movb $0, %al
; CHECK: callq _test16callee
  call void (...) @test16callee(i32 1)
  br label %block2

block2:
; CHECK: movsd LCP{{.*}}_{{.*}}(%rip), %xmm0
; CHECK: movb $1, %al
; CHECK: callq _test16callee

; AVX: vmovsd LCP{{.*}}_{{.*}}(%rip), %xmm0
; AVX: movb $1, %al
; AVX: callq _test16callee
  call void (...) @test16callee(double 1.000000e+00)
  ret void
}


declare void @foo() unnamed_addr ssp align 2

; Verify that we don't fold the load into the compare here.  That would move it
; w.r.t. the call.
define i32 @test17(i32 *%P) ssp nounwind {
entry:
  %tmp = load i32, i32* %P
  %cmp = icmp ne i32 %tmp, 5
  call void @foo()
  br i1 %cmp, label %if.then, label %if.else

if.then:                                          ; preds = %entry
  ret i32 1

if.else:                                          ; preds = %entry
  ret i32 2
; CHECK-LABEL: test17:
; CHECK: movl	(%rdi), %eax
; CHECK: callq _foo
; CHECK: cmpl	$5, %eax
; CHECK-NEXT: je
}

; Check that 0.0 is materialized using xorps
define void @test18(float* %p1) {
  store float 0.0, float* %p1
  ret void
; CHECK-LABEL: test18:
; CHECK: xorps
}

; Without any type hints, doubles use the smaller xorps instead of xorpd.
define void @test19(double* %p1) {
  store double 0.0, double* %p1
  ret void
; CHECK-LABEL: test19:
; CHECK: xorps
}

; Check that we fast-isel sret
%struct.a = type { i64, i64, i64 }
define void @test20() nounwind ssp {
entry:
  %tmp = alloca %struct.a, align 8
  call void @test20sret(%struct.a* sret(%struct.a) %tmp)
  ret void
; CHECK-LABEL: test20:
; CHECK: movq %rsp, %rdi
; CHECK: callq _test20sret
}
declare void @test20sret(%struct.a* sret(%struct.a))

; Check that -0.0 is not materialized using xor
define void @test21(double* %p1) {
  store double -0.0, double* %p1
  ret void
; CHECK-LABEL: test21:
; CHECK-NOT: xor
; CHECK: movsd	LCPI
}

; Check that immediate arguments to a function
; do not cause massive spilling and are used
; as immediates just before the call.
define void @test22() nounwind {
entry:
  call void @foo22(i32 0)
  call void @foo22(i32 1)
  call void @foo22(i32 2)
  call void @foo22(i32 3)
  ret void
; CHECK-LABEL: test22:
; CHECK: xorl	%edi, %edi
; CHECK: callq	_foo22
; CHECK: movl	$1, %edi
; CHECK: callq	_foo22
; CHECK: movl	$2, %edi
; CHECK: callq	_foo22
; CHECK: movl	$3, %edi
; CHECK: callq	_foo22
}

declare void @foo22(i32)

; PR13563
define void @test23(i8* noalias sret(i8) %result) {
  %a = alloca i8
  %b = call i8* @foo23()
  ret void
; CHECK-LABEL: test23:
; CHECK: movq %rdi, [[STACK:[0-9]+\(%rsp\)]]
; CHECK: call
; CHECK-NEXT: movq [[STACK]], %rax
; CHECK-NEXT: addq $24, %rsp
; CHECK: ret
}

declare i8* @foo23()

declare void @takesi32ptr(i32* %arg)

; CHECK-LABEL: allocamaterialize
define void @allocamaterialize() {
  %a = alloca i32
; CHECK: leaq {{.*}}, %rdi
  call void @takesi32ptr(i32* %a)
  ret void
}

; STDERR-NOT: FastISel missed terminator:   ret void
; CHECK-LABEL: win64ccfun
define win64cc void @win64ccfun(i32 %i) {
; CHECK: ret
  ret void
}