1; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=CMOV 2; RUN: llc < %s -asm-verbose=false -mtriple=i686-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=NOCMOV 3 4target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 5 6; Test 2xCMOV patterns exposed after legalization. 7; One way to do that is with (select (fcmp une/oeq)), which gets 8; legalized to setp/setne. 9 10; CHECK-LABEL: test_select_fcmp_oeq_i32: 11 12; CMOV-NEXT: ucomiss %xmm1, %xmm0 13; CMOV-NEXT: cmovnel %esi, %edi 14; CMOV-NEXT: cmovpl %esi, %edi 15; CMOV-NEXT: movl %edi, %eax 16; CMOV-NEXT: retq 17 18; NOCMOV-NEXT: flds 8(%esp) 19; NOCMOV-NEXT: flds 4(%esp) 20; NOCMOV-NEXT: fucompp 21; NOCMOV-NEXT: fnstsw %ax 22; NOCMOV-NEXT: sahf 23; NOCMOV-NEXT: leal 16(%esp), %eax 24; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 25; NOCMOV-NEXT: jp [[TBB]] 26; NOCMOV-NEXT: leal 12(%esp), %eax 27; NOCMOV-NEXT:[[TBB]]: 28; NOCMOV-NEXT: movl (%eax), %eax 29; NOCMOV-NEXT: retl 30define i32 @test_select_fcmp_oeq_i32(float %a, float %b, i32 %c, i32 %d) #0 { 31entry: 32 %cmp = fcmp oeq float %a, %b 33 %r = select i1 %cmp, i32 %c, i32 %d 34 ret i32 %r 35} 36 37; CHECK-LABEL: test_select_fcmp_oeq_i64: 38 39; CMOV-NEXT: ucomiss %xmm1, %xmm0 40; CMOV-NEXT: cmovneq %rsi, %rdi 41; CMOV-NEXT: cmovpq %rsi, %rdi 42; CMOV-NEXT: movq %rdi, %rax 43; CMOV-NEXT: retq 44 45; NOCMOV-NEXT: flds 8(%esp) 46; NOCMOV-NEXT: flds 4(%esp) 47; NOCMOV-NEXT: fucompp 48; NOCMOV-NEXT: fnstsw %ax 49; NOCMOV-NEXT: sahf 50; NOCMOV-NEXT: leal 20(%esp), %ecx 51; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 52; NOCMOV-NEXT: jp [[TBB]] 53; NOCMOV-NEXT: leal 12(%esp), %ecx 54; NOCMOV-NEXT: [[TBB]]: 55; NOCMOV-NEXT: movl (%ecx), %eax 56; NOCMOV-NEXT: movl 4(%ecx), %edx 57; NOCMOV-NEXT: retl 58define i64 @test_select_fcmp_oeq_i64(float %a, float %b, i64 %c, i64 %d) #0 { 59entry: 60 %cmp = fcmp oeq float %a, %b 61 %r = select i1 %cmp, i64 %c, i64 %d 62 ret i64 %r 63} 64 65; CHECK-LABEL: test_select_fcmp_une_i64: 66 67; CMOV-NEXT: ucomiss %xmm1, %xmm0 68; CMOV-NEXT: cmovneq %rdi, %rsi 69; CMOV-NEXT: cmovpq %rdi, %rsi 70; CMOV-NEXT: movq %rsi, %rax 71; CMOV-NEXT: retq 72 73; NOCMOV-NEXT: flds 8(%esp) 74; NOCMOV-NEXT: flds 4(%esp) 75; NOCMOV-NEXT: fucompp 76; NOCMOV-NEXT: fnstsw %ax 77; NOCMOV-NEXT: sahf 78; NOCMOV-NEXT: leal 12(%esp), %ecx 79; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 80; NOCMOV-NEXT: jp [[TBB]] 81; NOCMOV-NEXT: leal 20(%esp), %ecx 82; NOCMOV-NEXT: [[TBB]]: 83; NOCMOV-NEXT: movl (%ecx), %eax 84; NOCMOV-NEXT: movl 4(%ecx), %edx 85; NOCMOV-NEXT: retl 86define i64 @test_select_fcmp_une_i64(float %a, float %b, i64 %c, i64 %d) #0 { 87entry: 88 %cmp = fcmp une float %a, %b 89 %r = select i1 %cmp, i64 %c, i64 %d 90 ret i64 %r 91} 92 93; CHECK-LABEL: test_select_fcmp_oeq_f64: 94 95; CMOV-NEXT: ucomiss %xmm1, %xmm0 96; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 97; CMOV-NEXT: jp [[TBB]] 98; CMOV-NEXT: movaps %xmm2, %xmm3 99; CMOV-NEXT: [[TBB]]: 100; CMOV-NEXT: movaps %xmm3, %xmm0 101; CMOV-NEXT: retq 102 103; NOCMOV-NEXT: flds 8(%esp) 104; NOCMOV-NEXT: flds 4(%esp) 105; NOCMOV-NEXT: fucompp 106; NOCMOV-NEXT: fnstsw %ax 107; NOCMOV-NEXT: sahf 108; NOCMOV-NEXT: leal 20(%esp), %eax 109; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 110; NOCMOV-NEXT: jp [[TBB]] 111; NOCMOV-NEXT: leal 12(%esp), %eax 112; NOCMOV-NEXT: [[TBB]]: 113; NOCMOV-NEXT: fldl (%eax) 114; NOCMOV-NEXT: retl 115define double @test_select_fcmp_oeq_f64(float %a, float %b, double %c, double %d) #0 { 116entry: 117 %cmp = fcmp oeq float %a, %b 118 %r = select i1 %cmp, double %c, double %d 119 ret double %r 120} 121 122; CHECK-LABEL: test_select_fcmp_oeq_v4i32: 123 124; CMOV-NEXT: ucomiss %xmm1, %xmm0 125; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 126; CMOV-NEXT: jp [[TBB]] 127; CMOV-NEXT: movaps %xmm2, %xmm3 128; CMOV-NEXT: [[TBB]]: 129; CMOV-NEXT: movaps %xmm3, %xmm0 130; CMOV-NEXT: retq 131 132; NOCMOV-NEXT: pushl %edi 133; NOCMOV-NEXT: pushl %esi 134; NOCMOV-NEXT: flds 20(%esp) 135; NOCMOV-NEXT: flds 16(%esp) 136; NOCMOV-NEXT: fucompp 137; NOCMOV-NEXT: fnstsw %ax 138; NOCMOV-NEXT: sahf 139; NOCMOV-NEXT: leal 40(%esp), %eax 140; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 141; NOCMOV-NEXT: jp [[TBB]] 142; NOCMOV-NEXT: leal 24(%esp), %eax 143; NOCMOV-NEXT: [[TBB]]: 144; NOCMOV-NEXT: movl (%eax), %ecx 145; NOCMOV-NEXT: leal 44(%esp), %edx 146; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 147; NOCMOV-NEXT: jp [[TBB]] 148; NOCMOV-NEXT: leal 28(%esp), %edx 149; NOCMOV-NEXT: [[TBB]]: 150; NOCMOV-NEXT: movl 12(%esp), %eax 151; NOCMOV-NEXT: movl (%edx), %edx 152; NOCMOV-NEXT: leal 48(%esp), %esi 153; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 154; NOCMOV-NEXT: jp [[TBB]] 155; NOCMOV-NEXT: leal 32(%esp), %esi 156; NOCMOV-NEXT: [[TBB]]: 157; NOCMOV-NEXT: movl (%esi), %esi 158; NOCMOV-NEXT: leal 52(%esp), %edi 159; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 160; NOCMOV-NEXT: jp [[TBB]] 161; NOCMOV-NEXT: leal 36(%esp), %edi 162; NOCMOV-NEXT: [[TBB]]: 163; NOCMOV-NEXT: movl (%edi), %edi 164; NOCMOV-NEXT: movl %edi, 12(%eax) 165; NOCMOV-NEXT: movl %esi, 8(%eax) 166; NOCMOV-NEXT: movl %edx, 4(%eax) 167; NOCMOV-NEXT: movl %ecx, (%eax) 168; NOCMOV-NEXT: popl %esi 169; NOCMOV-NEXT: popl %edi 170; NOCMOV-NEXT: retl $4 171define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <4 x i32> %d) #0 { 172entry: 173 %cmp = fcmp oeq float %a, %b 174 %r = select i1 %cmp, <4 x i32> %c, <4 x i32> %d 175 ret <4 x i32> %r 176} 177 178; Also make sure we catch the original code-sequence of interest: 179 180; CMOV: [[ONE_F32_LCPI:.LCPI.*]]: 181; CMOV-NEXT: .long 1065353216 182 183; CHECK-LABEL: test_zext_fcmp_une: 184; CMOV-NEXT: ucomiss %xmm1, %xmm0 185; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0 186; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 187; CMOV-NEXT: jp [[TBB]] 188; CMOV-NEXT: xorps %xmm0, %xmm0 189; CMOV-NEXT: [[TBB]]: 190; CMOV-NEXT: retq 191 192; NOCMOV: jne 193; NOCMOV-NEXT: jp 194define float @test_zext_fcmp_une(float %a, float %b) #0 { 195entry: 196 %cmp = fcmp une float %a, %b 197 %conv1 = zext i1 %cmp to i32 198 %conv2 = sitofp i32 %conv1 to float 199 ret float %conv2 200} 201 202; CMOV: [[ONE_F32_LCPI:.LCPI.*]]: 203; CMOV-NEXT: .long 1065353216 204 205; CHECK-LABEL: test_zext_fcmp_oeq: 206; CMOV-NEXT: ucomiss %xmm1, %xmm0 207; CMOV-NEXT: xorps %xmm0, %xmm0 208; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 209; CMOV-NEXT: jp [[TBB]] 210; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0 211; CMOV-NEXT: [[TBB]]: 212; CMOV-NEXT: retq 213 214; NOCMOV: jne 215; NOCMOV-NEXT: jp 216define float @test_zext_fcmp_oeq(float %a, float %b) #0 { 217entry: 218 %cmp = fcmp oeq float %a, %b 219 %conv1 = zext i1 %cmp to i32 220 %conv2 = sitofp i32 %conv1 to float 221 ret float %conv2 222} 223 224attributes #0 = { nounwind } 225 226@g8 = global i8 0 227 228; The following test failed because llvm had a bug where a structure like: 229; 230; %12 = CMOV_GR8 %7, %11 ... (lt) 231; %13 = CMOV_GR8 %12, %11 ... (gt) 232; 233; was lowered to: 234; 235; The first two cmovs got expanded to: 236; %bb.0: 237; JL_1 %bb.9 238; %bb.7: 239; JG_1 %bb.9 240; %bb.8: 241; %bb.9: 242; %12 = phi(%7, %bb.8, %11, %bb.0, %12, %bb.7) 243; %13 = COPY %12 244; Which was invalid as %12 is not the same value as %13 245 246; CHECK-LABEL: no_cascade_opt: 247; CMOV-DAG: cmpl %edx, %esi 248; CMOV-DAG: movb $20, %al 249; CMOV-DAG: movb $20, %dl 250; CMOV: jge [[BB2:.LBB[0-9_]+]] 251; CMOV: jle [[BB3:.LBB[0-9_]+]] 252; CMOV: [[BB0:.LBB[0-9_]+]] 253; CMOV: testl %edi, %edi 254; CMOV: jne [[BB4:.LBB[0-9_]+]] 255; CMOV: [[BB1:.LBB[0-9_]+]] 256; CMOV: movb %al, g8(%rip) 257; CMOV: retq 258; CMOV: [[BB2]]: 259; CMOV: movl %ecx, %edx 260; CMOV: jg [[BB0]] 261; CMOV: [[BB3]]: 262; CMOV: movl %edx, %eax 263; CMOV: testl %edi, %edi 264; CMOV: je [[BB1]] 265; CMOV: [[BB4]]: 266; CMOV: movl %edx, %eax 267; CMOV: movb %al, g8(%rip) 268; CMOV: retq 269define void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) { 270entry: 271 %c0 = icmp eq i32 %v0, 0 272 %c1 = icmp slt i32 %v1, %v2 273 %c2 = icmp sgt i32 %v1, %v2 274 %trunc = trunc i32 %v3 to i8 275 %sel0 = select i1 %c1, i8 20, i8 %trunc 276 %sel1 = select i1 %c2, i8 20, i8 %sel0 277 %sel2 = select i1 %c0, i8 %sel1, i8 %sel0 278 store volatile i8 %sel2, i8* @g8 279 ret void 280} 281