1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 -verify-machineinstrs | FileCheck %s 3 4; After tail duplication, two copies in an early exit BB can be cancelled out. 5; rdar://10640363 6define i32 @t1(i32 %a, i32 %b) nounwind { 7; CHECK-LABEL: t1: 8; CHECK: ## %bb.0: ## %entry 9; CHECK-NEXT: movl %edi, %eax 10; CHECK-NEXT: testl %esi, %esi 11; CHECK-NEXT: je LBB0_4 12; CHECK-NEXT: ## %bb.1: ## %while.body.preheader 13; CHECK-NEXT: movl %esi, %edx 14; CHECK-NEXT: .p2align 4, 0x90 15; CHECK-NEXT: LBB0_2: ## %while.body 16; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 17; CHECK-NEXT: movl %edx, %ecx 18; CHECK-NEXT: cltd 19; CHECK-NEXT: idivl %ecx 20; CHECK-NEXT: testl %edx, %edx 21; CHECK-NEXT: movl %ecx, %eax 22; CHECK-NEXT: jne LBB0_2 23; CHECK-NEXT: ## %bb.3: ## %while.end 24; CHECK-NEXT: movl %ecx, %eax 25; CHECK-NEXT: LBB0_4: 26; CHECK-NEXT: retq 27entry: 28 %cmp1 = icmp eq i32 %b, 0 29 br i1 %cmp1, label %while.end, label %while.body 30 31while.body: ; preds = %entry, %while.body 32 %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ] 33 %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ] 34 %rem = srem i32 %a.addr.03, %b.addr.02 35 %cmp = icmp eq i32 %rem, 0 36 br i1 %cmp, label %while.end, label %while.body 37 38while.end: ; preds = %while.body, %entry 39 %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ] 40 ret i32 %a.addr.0.lcssa 41} 42 43; Two movdqa (from phi-elimination) in the entry BB cancels out. 44; rdar://10428165 45define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 46; CHECK-LABEL: t2: 47; CHECK: ## %bb.0: ## %entry 48; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 49; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7] 50; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 51; CHECK-NEXT: retq 52entry: 53 %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef > 54 ret <8 x i16> %tmp8 55} 56 57define i32 @t3(i64 %a, i64 %b) nounwind { 58; CHECK-LABEL: t3: 59; CHECK: ## %bb.0: ## %entry 60; CHECK-NEXT: movq %rdi, %rax 61; CHECK-NEXT: testq %rsi, %rsi 62; CHECK-NEXT: je LBB2_4 63; CHECK-NEXT: ## %bb.1: ## %while.body.preheader 64; CHECK-NEXT: movq %rsi, %rdx 65; CHECK-NEXT: .p2align 4, 0x90 66; CHECK-NEXT: LBB2_2: ## %while.body 67; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 68; CHECK-NEXT: movq %rdx, %rcx 69; CHECK-NEXT: cqto 70; CHECK-NEXT: idivq %rcx 71; CHECK-NEXT: testq %rdx, %rdx 72; CHECK-NEXT: movq %rcx, %rax 73; CHECK-NEXT: jne LBB2_2 74; CHECK-NEXT: ## %bb.3: ## %while.end 75; CHECK-NEXT: movl %ecx, %eax 76; CHECK-NEXT: LBB2_4: 77; CHECK-NEXT: retq 78entry: 79 %cmp1 = icmp eq i64 %b, 0 80 br i1 %cmp1, label %while.end, label %while.body 81 82while.body: ; preds = %entry, %while.body 83 %a.addr.03 = phi i64 [ %b.addr.02, %while.body ], [ %a, %entry ] 84 %b.addr.02 = phi i64 [ %rem, %while.body ], [ %b, %entry ] 85 %rem = srem i64 %a.addr.03, %b.addr.02 86 %cmp = icmp eq i64 %rem, 0 87 br i1 %cmp, label %while.end, label %while.body 88 89while.end: ; preds = %while.body, %entry 90 %a.addr.0.lcssa = phi i64 [ %a, %entry ], [ %b.addr.02, %while.body ] 91 %t = trunc i64 %a.addr.0.lcssa to i32 92 ret i32 %t 93} 94 95; Check that copy propagation does not kill thing like: 96; dst = copy src <-- do not kill that. 97; ... = op1 undef dst 98; ... = op2 dst <-- this is used here. 99define <16 x float> @foo(<16 x float> %x) { 100; CHECK-LABEL: foo: 101; CHECK: ## %bb.0: ## %bb 102; CHECK-NEXT: movaps %xmm3, %xmm9 103; CHECK-NEXT: movaps %xmm2, %xmm8 104; CHECK-NEXT: movaps %xmm0, %xmm7 105; CHECK-NEXT: xorps %xmm0, %xmm0 106; CHECK-NEXT: movaps %xmm3, %xmm2 107; CHECK-NEXT: cmpltps %xmm0, %xmm2 108; CHECK-NEXT: movaps %xmm2, %xmm4 109; CHECK-NEXT: orps {{.*}}(%rip), %xmm4 110; CHECK-NEXT: movaps %xmm4, %xmm10 111; CHECK-NEXT: andnps %xmm2, %xmm10 112; CHECK-NEXT: movaps %xmm8, %xmm5 113; CHECK-NEXT: cmpltps %xmm0, %xmm5 114; CHECK-NEXT: movaps {{.*#+}} xmm11 = [9,10,11,12] 115; CHECK-NEXT: movaps %xmm5, %xmm2 116; CHECK-NEXT: orps %xmm11, %xmm2 117; CHECK-NEXT: movaps %xmm2, %xmm14 118; CHECK-NEXT: andnps %xmm5, %xmm14 119; CHECK-NEXT: cvttps2dq %xmm1, %xmm12 120; CHECK-NEXT: cmpltps %xmm0, %xmm1 121; CHECK-NEXT: movaps {{.*#+}} xmm13 = [5,6,7,8] 122; CHECK-NEXT: movaps %xmm1, %xmm6 123; CHECK-NEXT: orps %xmm13, %xmm6 124; CHECK-NEXT: movaps %xmm6, %xmm5 125; CHECK-NEXT: andnps %xmm1, %xmm5 126; CHECK-NEXT: cvttps2dq %xmm7, %xmm3 127; CHECK-NEXT: cmpltps %xmm0, %xmm7 128; CHECK-NEXT: movaps {{.*#+}} xmm15 = [1,2,3,4] 129; CHECK-NEXT: movaps %xmm7, %xmm0 130; CHECK-NEXT: orps %xmm15, %xmm0 131; CHECK-NEXT: movaps %xmm0, %xmm1 132; CHECK-NEXT: andnps %xmm7, %xmm1 133; CHECK-NEXT: andps %xmm15, %xmm0 134; CHECK-NEXT: cvtdq2ps %xmm3, %xmm3 135; CHECK-NEXT: andps %xmm3, %xmm0 136; CHECK-NEXT: movaps {{.*#+}} xmm3 = [1,1,1,1] 137; CHECK-NEXT: andps %xmm3, %xmm1 138; CHECK-NEXT: orps %xmm1, %xmm0 139; CHECK-NEXT: andps %xmm13, %xmm6 140; CHECK-NEXT: cvtdq2ps %xmm12, %xmm1 141; CHECK-NEXT: andps %xmm1, %xmm6 142; CHECK-NEXT: andps %xmm3, %xmm5 143; CHECK-NEXT: orps %xmm5, %xmm6 144; CHECK-NEXT: andps %xmm11, %xmm2 145; CHECK-NEXT: cvttps2dq %xmm8, %xmm1 146; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1 147; CHECK-NEXT: andps %xmm1, %xmm2 148; CHECK-NEXT: andps %xmm3, %xmm14 149; CHECK-NEXT: orps %xmm14, %xmm2 150; CHECK-NEXT: andps %xmm3, %xmm10 151; CHECK-NEXT: andps {{.*}}(%rip), %xmm4 152; CHECK-NEXT: cvttps2dq %xmm9, %xmm1 153; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1 154; CHECK-NEXT: andps %xmm1, %xmm4 155; CHECK-NEXT: orps %xmm10, %xmm4 156; CHECK-NEXT: movaps %xmm6, %xmm1 157; CHECK-NEXT: movaps %xmm4, %xmm3 158; CHECK-NEXT: retq 159bb: 160 %v3 = icmp slt <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, zeroinitializer 161 %v14 = zext <16 x i1> %v3 to <16 x i32> 162 %v16 = fcmp olt <16 x float> %x, zeroinitializer 163 %v17 = sext <16 x i1> %v16 to <16 x i32> 164 %v18 = zext <16 x i1> %v16 to <16 x i32> 165 %v19 = xor <16 x i32> %v14, %v18 166 %v20 = or <16 x i32> %v17, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> 167 %v21 = fptosi <16 x float> %x to <16 x i32> 168 %v22 = sitofp <16 x i32> %v21 to <16 x float> 169 %v69 = fcmp ogt <16 x float> %v22, zeroinitializer 170 %v75 = and <16 x i1> %v69, %v3 171 %v77 = bitcast <16 x float> %v22 to <16 x i32> 172 %v79 = sext <16 x i1> %v75 to <16 x i32> 173 %v80 = and <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, %v79 174 %v81 = xor <16 x i32> %v77, %v80 175 %v82 = and <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, %v81 176 %v83 = xor <16 x i32> %v19, %v82 177 %v84 = and <16 x i32> %v83, %v20 178 %v85 = xor <16 x i32> %v19, %v84 179 %v86 = bitcast <16 x i32> %v85 to <16 x float> 180 ret <16 x float> %v86 181} 182