1; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=CMOV 2; RUN: llc < %s -asm-verbose=false -mtriple=i686-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=NOCMOV 3 4target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 5 6; Test 2xCMOV patterns exposed after legalization. 7; One way to do that is with (select (fcmp une/oeq)), which gets 8; legalized to setp/setne. 9 10; CHECK-LABEL: test_select_fcmp_oeq_i32: 11 12; CMOV-NEXT: ucomiss %xmm1, %xmm0 13; CMOV-NEXT: cmovnel %esi, %edi 14; CMOV-NEXT: cmovpl %esi, %edi 15; CMOV-NEXT: movl %edi, %eax 16; CMOV-NEXT: retq 17 18; NOCMOV-NEXT: flds 8(%esp) 19; NOCMOV-NEXT: flds 4(%esp) 20; NOCMOV-NEXT: fucompp 21; NOCMOV-NEXT: fnstsw %ax 22; NOCMOV-NEXT: sahf 23; NOCMOV-NEXT: leal 16(%esp), %eax 24; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 25; NOCMOV-NEXT: jp [[TBB]] 26; NOCMOV-NEXT: leal 12(%esp), %eax 27; NOCMOV-NEXT:[[TBB]]: 28; NOCMOV-NEXT: movl (%eax), %eax 29; NOCMOV-NEXT: retl 30define i32 @test_select_fcmp_oeq_i32(float %a, float %b, i32 %c, i32 %d) #0 { 31entry: 32 %cmp = fcmp oeq float %a, %b 33 %r = select i1 %cmp, i32 %c, i32 %d 34 ret i32 %r 35} 36 37; CHECK-LABEL: test_select_fcmp_oeq_i64: 38 39; CMOV-NEXT: ucomiss %xmm1, %xmm0 40; CMOV-NEXT: cmovneq %rsi, %rdi 41; CMOV-NEXT: cmovpq %rsi, %rdi 42; CMOV-NEXT: movq %rdi, %rax 43; CMOV-NEXT: retq 44 45; NOCMOV-NEXT: flds 8(%esp) 46; NOCMOV-NEXT: flds 4(%esp) 47; NOCMOV-NEXT: fucompp 48; NOCMOV-NEXT: fnstsw %ax 49; NOCMOV-NEXT: sahf 50; NOCMOV-NEXT: leal 20(%esp), %ecx 51; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 52; NOCMOV-NEXT: jp [[TBB]] 53; NOCMOV-NEXT: leal 12(%esp), %ecx 54; NOCMOV-NEXT: [[TBB]]: 55; NOCMOV-NEXT: movl (%ecx), %eax 56; NOCMOV-NEXT: orl $4, %ecx 57; NOCMOV-NEXT: movl (%ecx), %edx 58; NOCMOV-NEXT: retl 59define i64 @test_select_fcmp_oeq_i64(float %a, float %b, i64 %c, i64 %d) #0 { 60entry: 61 %cmp = fcmp oeq float %a, %b 62 %r = select i1 %cmp, i64 %c, i64 %d 63 ret i64 %r 64} 65 66; CHECK-LABEL: test_select_fcmp_une_i64: 67 68; CMOV-NEXT: ucomiss %xmm1, %xmm0 69; CMOV-NEXT: cmovneq %rdi, %rsi 70; CMOV-NEXT: cmovpq %rdi, %rsi 71; CMOV-NEXT: movq %rsi, %rax 72; CMOV-NEXT: retq 73 74; NOCMOV-NEXT: flds 8(%esp) 75; NOCMOV-NEXT: flds 4(%esp) 76; NOCMOV-NEXT: fucompp 77; NOCMOV-NEXT: fnstsw %ax 78; NOCMOV-NEXT: sahf 79; NOCMOV-NEXT: leal 12(%esp), %ecx 80; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 81; NOCMOV-NEXT: jp [[TBB]] 82; NOCMOV-NEXT: leal 20(%esp), %ecx 83; NOCMOV-NEXT: [[TBB]]: 84; NOCMOV-NEXT: movl (%ecx), %eax 85; NOCMOV-NEXT: orl $4, %ecx 86; NOCMOV-NEXT: movl (%ecx), %edx 87; NOCMOV-NEXT: retl 88define i64 @test_select_fcmp_une_i64(float %a, float %b, i64 %c, i64 %d) #0 { 89entry: 90 %cmp = fcmp une float %a, %b 91 %r = select i1 %cmp, i64 %c, i64 %d 92 ret i64 %r 93} 94 95; CHECK-LABEL: test_select_fcmp_oeq_f64: 96 97; CMOV-NEXT: ucomiss %xmm1, %xmm0 98; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 99; CMOV-NEXT: jp [[TBB]] 100; CMOV-NEXT: movaps %xmm2, %xmm3 101; CMOV-NEXT: [[TBB]]: 102; CMOV-NEXT: movaps %xmm3, %xmm0 103; CMOV-NEXT: retq 104 105; NOCMOV-NEXT: flds 8(%esp) 106; NOCMOV-NEXT: flds 4(%esp) 107; NOCMOV-NEXT: fucompp 108; NOCMOV-NEXT: fnstsw %ax 109; NOCMOV-NEXT: sahf 110; NOCMOV-NEXT: leal 20(%esp), %eax 111; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 112; NOCMOV-NEXT: jp [[TBB]] 113; NOCMOV-NEXT: leal 12(%esp), %eax 114; NOCMOV-NEXT: [[TBB]]: 115; NOCMOV-NEXT: fldl (%eax) 116; NOCMOV-NEXT: retl 117define double @test_select_fcmp_oeq_f64(float %a, float %b, double %c, double %d) #0 { 118entry: 119 %cmp = fcmp oeq float %a, %b 120 %r = select i1 %cmp, double %c, double %d 121 ret double %r 122} 123 124; CHECK-LABEL: test_select_fcmp_oeq_v4i32: 125 126; CMOV-NEXT: ucomiss %xmm1, %xmm0 127; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 128; CMOV-NEXT: jp [[TBB]] 129; CMOV-NEXT: movaps %xmm2, %xmm3 130; CMOV-NEXT: [[TBB]]: 131; CMOV-NEXT: movaps %xmm3, %xmm0 132; CMOV-NEXT: retq 133 134; NOCMOV-NEXT: pushl %edi 135; NOCMOV-NEXT: pushl %esi 136; NOCMOV-NEXT: flds 20(%esp) 137; NOCMOV-NEXT: flds 16(%esp) 138; NOCMOV-NEXT: fucompp 139; NOCMOV-NEXT: fnstsw %ax 140; NOCMOV-NEXT: sahf 141; NOCMOV-NEXT: leal 40(%esp), %eax 142; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 143; NOCMOV-NEXT: jp [[TBB]] 144; NOCMOV-NEXT: leal 24(%esp), %eax 145; NOCMOV-NEXT: [[TBB]]: 146; NOCMOV-NEXT: movl (%eax), %ecx 147; NOCMOV-NEXT: leal 44(%esp), %edx 148; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 149; NOCMOV-NEXT: jp [[TBB]] 150; NOCMOV-NEXT: leal 28(%esp), %edx 151; NOCMOV-NEXT: [[TBB]]: 152; NOCMOV-NEXT: movl 12(%esp), %eax 153; NOCMOV-NEXT: movl (%edx), %edx 154; NOCMOV-NEXT: leal 48(%esp), %esi 155; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 156; NOCMOV-NEXT: jp [[TBB]] 157; NOCMOV-NEXT: leal 32(%esp), %esi 158; NOCMOV-NEXT: [[TBB]]: 159; NOCMOV-NEXT: movl (%esi), %esi 160; NOCMOV-NEXT: leal 52(%esp), %edi 161; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 162; NOCMOV-NEXT: jp [[TBB]] 163; NOCMOV-NEXT: leal 36(%esp), %edi 164; NOCMOV-NEXT: [[TBB]]: 165; NOCMOV-NEXT: movl (%edi), %edi 166; NOCMOV-NEXT: movl %edi, 12(%eax) 167; NOCMOV-NEXT: movl %esi, 8(%eax) 168; NOCMOV-NEXT: movl %edx, 4(%eax) 169; NOCMOV-NEXT: movl %ecx, (%eax) 170; NOCMOV-NEXT: popl %esi 171; NOCMOV-NEXT: popl %edi 172; NOCMOV-NEXT: retl $4 173define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <4 x i32> %d) #0 { 174entry: 175 %cmp = fcmp oeq float %a, %b 176 %r = select i1 %cmp, <4 x i32> %c, <4 x i32> %d 177 ret <4 x i32> %r 178} 179 180; Also make sure we catch the original code-sequence of interest: 181 182; CMOV: [[ONE_F32_LCPI:.LCPI.*]]: 183; CMOV-NEXT: .long 1065353216 184 185; CHECK-LABEL: test_zext_fcmp_une: 186; CMOV-NEXT: ucomiss %xmm1, %xmm0 187; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0 188; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 189; CMOV-NEXT: jp [[TBB]] 190; CMOV-NEXT: xorps %xmm0, %xmm0 191; CMOV-NEXT: [[TBB]]: 192; CMOV-NEXT: retq 193 194; NOCMOV: jne 195; NOCMOV-NEXT: jp 196define float @test_zext_fcmp_une(float %a, float %b) #0 { 197entry: 198 %cmp = fcmp une float %a, %b 199 %conv1 = zext i1 %cmp to i32 200 %conv2 = sitofp i32 %conv1 to float 201 ret float %conv2 202} 203 204; CMOV: [[ONE_F32_LCPI:.LCPI.*]]: 205; CMOV-NEXT: .long 1065353216 206 207; CHECK-LABEL: test_zext_fcmp_oeq: 208; CMOV-NEXT: ucomiss %xmm1, %xmm0 209; CMOV-NEXT: xorps %xmm0, %xmm0 210; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 211; CMOV-NEXT: jp [[TBB]] 212; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0 213; CMOV-NEXT: [[TBB]]: 214; CMOV-NEXT: retq 215 216; NOCMOV: jne 217; NOCMOV-NEXT: jp 218define float @test_zext_fcmp_oeq(float %a, float %b) #0 { 219entry: 220 %cmp = fcmp oeq float %a, %b 221 %conv1 = zext i1 %cmp to i32 222 %conv2 = sitofp i32 %conv1 to float 223 ret float %conv2 224} 225 226attributes #0 = { nounwind } 227 228@g8 = global i8 0 229 230; The following test failed because llvm had a bug where a structure like: 231; 232; %vreg12<def> = CMOV_GR8 %vreg7, %vreg11 ... (lt) 233; %vreg13<def> = CMOV_GR8 %vreg12, %vreg11 ... (gt) 234; 235; was lowered to: 236; 237; The first two cmovs got expanded to: 238; BB#0: 239; JL_1 BB#9 240; BB#7: 241; JG_1 BB#9 242; BB#8: 243; BB#9: 244; vreg12 = phi(vreg7, BB#8, vreg11, BB#0, vreg12, BB#7) 245; vreg13 = COPY vreg12 246; Which was invalid as %vreg12 is not the same value as %vreg13 247 248; CHECK-LABEL: no_cascade_opt: 249; CMOV-DAG: cmpl %edx, %esi 250; CMOV-DAG: movb $20, %al 251; CMOV-DAG: movb $20, %dl 252; CMOV: jl [[BB0:.LBB[0-9_]+]] 253; CMOV: movl %ecx, %edx 254; CMOV: [[BB0]]: 255; CMOV: jg [[BB1:.LBB[0-9_]+]] 256; CMOV: movl %edx, %eax 257; CMOV: [[BB1]]: 258; CMOV: testl %edi, %edi 259; CMOV: je [[BB2:.LBB[0-9_]+]] 260; CMOV: movl %edx, %eax 261; CMOV: [[BB2]]: 262; CMOV: movb %al, g8(%rip) 263; CMOV: retq 264define void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) { 265entry: 266 %c0 = icmp eq i32 %v0, 0 267 %c1 = icmp slt i32 %v1, %v2 268 %c2 = icmp sgt i32 %v1, %v2 269 %trunc = trunc i32 %v3 to i8 270 %sel0 = select i1 %c1, i8 20, i8 %trunc 271 %sel1 = select i1 %c2, i8 20, i8 %sel0 272 %sel2 = select i1 %c0, i8 %sel1, i8 %sel0 273 store volatile i8 %sel2, i8* @g8 274 ret void 275} 276