• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple=x86_64-apple-macosx -mattr=+sse2 -verify-machineinstrs < %s | FileCheck %s
2
3; After tail duplication, two copies in an early exit BB can be cancelled out.
4; rdar://10640363
5define i32 @t1(i32 %a, i32 %b) nounwind  {
6entry:
7; CHECK-LABEL: t1:
8; CHECK: je [[LABEL:.*BB.*]]
9  %cmp1 = icmp eq i32 %b, 0
10  br i1 %cmp1, label %while.end, label %while.body
11
12; CHECK: [[LABEL]]:
13; CHECK-NOT: mov
14; CHECK: ret
15
16while.body:                                       ; preds = %entry, %while.body
17  %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ]
18  %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ]
19  %rem = srem i32 %a.addr.03, %b.addr.02
20  %cmp = icmp eq i32 %rem, 0
21  br i1 %cmp, label %while.end, label %while.body
22
23while.end:                                        ; preds = %while.body, %entry
24  %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ]
25  ret i32 %a.addr.0.lcssa
26}
27
28; Two movdqa (from phi-elimination) in the entry BB cancels out.
29; rdar://10428165
30define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
31entry:
32; CHECK-LABEL: t2:
33; CHECK-NOT: movdqa
34  %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
35  ret <8 x i16> %tmp8
36}
37
38define i32 @t3(i64 %a, i64 %b) nounwind  {
39entry:
40; CHECK-LABEL: t3:
41; CHECK: je [[LABEL:.*BB.*]]
42  %cmp1 = icmp eq i64 %b, 0
43  br i1 %cmp1, label %while.end, label %while.body
44
45; CHECK: [[LABEL]]:
46; CHECK-NOT: mov
47; CHECK: ret
48
49while.body:                                       ; preds = %entry, %while.body
50  %a.addr.03 = phi i64 [ %b.addr.02, %while.body ], [ %a, %entry ]
51  %b.addr.02 = phi i64 [ %rem, %while.body ], [ %b, %entry ]
52  %rem = srem i64 %a.addr.03, %b.addr.02
53  %cmp = icmp eq i64 %rem, 0
54  br i1 %cmp, label %while.end, label %while.body
55
56while.end:                                        ; preds = %while.body, %entry
57  %a.addr.0.lcssa = phi i64 [ %a, %entry ], [ %b.addr.02, %while.body ]
58  %t = trunc i64 %a.addr.0.lcssa to i32
59  ret i32 %t
60}
61
62; Check that copy propagation does not kill thing like:
63; dst = copy src <-- do not kill that.
64; ... = op1 dst<undef>
65; ... = op2 dst <-- this is used here.
66;
67; CHECK-LABEL: foo:
68; CHECK: psllw $7,
69; CHECK: psllw $7, [[SRC1:%xmm[0-9]+]]
70; CHECK-NEXT: pand {{.*}}(%rip), [[SRC1]]
71; CHECK-NEXT: pcmpgtb [[SRC1]], [[SRC2:%xmm[0-9]+]]
72; CHECK-NEXT: pand %xmm{{[0-9]+}}, [[SRC2]]
73; CHECK-NEXT: movdqa [[SRC2]], [[CPY1:%xmm[0-9]+]]
74; CHECK-NEXT: punpcklbw %xmm{{[0-9]+}}, [[CPY1]]
75; Check that CPY1 is not redefined.
76; CHECK-NOT:  , [[CPY1]]
77; CHECK: punpckhwd %xmm{{[0-9]+}}, [[CPY1]]
78; CHECK-NEXT: pslld $31, [[CPY1]]
79; CHECK-NEXT: psrad $31, [[CPY1]]
80; CHECK: punpckhbw %xmm{{[0-9]+}}, [[CPY2:%xmm[0-9]+]]
81; Check that CPY2 is not redefined.
82; CHECK-NOT:  , [[CPY2]]
83; CHECK: punpckhwd %xmm{{[0-9]+}}, [[CPY2]]
84; CHECK-NEXT: pslld $31, [[CPY2]]
85; CHECK-NEXT: psrad $31, [[CPY2]]
86define <16 x float> @foo(<16 x float> %x) {
87bb:
88  %v3 = icmp slt <16 x i32> undef, zeroinitializer
89  %v14 = zext <16 x i1> %v3 to <16 x i32>
90  %v16 = fcmp olt <16 x float> %x, zeroinitializer
91  %v17 = sext <16 x i1> %v16 to <16 x i32>
92  %v18 = zext <16 x i1> %v16 to <16 x i32>
93  %v19 = xor <16 x i32> %v14, %v18
94  %v20 = or <16 x i32> %v17, undef
95  %v21 = fptosi <16 x float> %x to <16 x i32>
96  %v22 = sitofp <16 x i32> %v21 to <16 x float>
97  %v69 = fcmp ogt <16 x float> %v22, zeroinitializer
98  %v75 = and <16 x i1> %v69, %v3
99  %v77 = bitcast <16 x float> %v22 to <16 x i32>
100  %v79 = sext <16 x i1> %v75 to <16 x i32>
101  %v80 = and <16 x i32> undef, %v79
102  %v81 = xor <16 x i32> %v77, %v80
103  %v82 = and <16 x i32> undef, %v81
104  %v83 = xor <16 x i32> %v19, %v82
105  %v84 = and <16 x i32> %v83, %v20
106  %v85 = xor <16 x i32> %v19, %v84
107  %v86 = bitcast <16 x i32> %v85 to <16 x float>
108  ret <16 x float> %v86
109}
110