1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; Check that a division is bypassed when appropriate only. 3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=atom < %s | FileCheck -check-prefixes=CHECK,ATOM %s 4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64 < %s | FileCheck -check-prefixes=CHECK,REST,X64 %s 5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=silvermont < %s | FileCheck -check-prefixes=CHECK,REST,SLM %s 6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck -check-prefixes=CHECK,REST,SKL %s 7; RUN: llc -profile-summary-huge-working-set-size-threshold=1 -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck -check-prefixes=HUGEWS %s 8 9; Verify that div32 is bypassed only for Atoms. 10define i32 @div32(i32 %a, i32 %b) { 11; ATOM-LABEL: div32: 12; ATOM: # %bb.0: # %entry 13; ATOM-NEXT: movl %edi, %eax 14; ATOM-NEXT: orl %esi, %eax 15; ATOM-NEXT: testl $-256, %eax 16; ATOM-NEXT: je .LBB0_1 17; ATOM-NEXT: # %bb.2: 18; ATOM-NEXT: movl %edi, %eax 19; ATOM-NEXT: cltd 20; ATOM-NEXT: idivl %esi 21; ATOM-NEXT: retq 22; ATOM-NEXT: .LBB0_1: 23; ATOM-NEXT: movzbl %dil, %eax 24; ATOM-NEXT: divb %sil 25; ATOM-NEXT: movzbl %al, %eax 26; ATOM-NEXT: retq 27; 28; REST-LABEL: div32: 29; REST: # %bb.0: # %entry 30; REST-NEXT: movl %edi, %eax 31; REST-NEXT: cltd 32; REST-NEXT: idivl %esi 33; REST-NEXT: retq 34; 35; HUGEWS-LABEL: div32: 36; HUGEWS: # %bb.0: # %entry 37; HUGEWS-NEXT: movl %edi, %eax 38; HUGEWS-NEXT: cltd 39; HUGEWS-NEXT: idivl %esi 40; HUGEWS-NEXT: retq 41entry: 42 %div = sdiv i32 %a, %b 43 ret i32 %div 44} 45 46; Verify that div64 is always bypassed. 47define i64 @div64(i64 %a, i64 %b) { 48; ATOM-LABEL: div64: 49; ATOM: # %bb.0: # %entry 50; ATOM-NEXT: movq %rdi, %rcx 51; ATOM-NEXT: movq %rdi, %rax 52; ATOM-NEXT: orq %rsi, %rcx 53; ATOM-NEXT: shrq $32, %rcx 54; ATOM-NEXT: je .LBB1_1 55; ATOM-NEXT: # %bb.2: 56; ATOM-NEXT: cqto 57; ATOM-NEXT: idivq %rsi 58; ATOM-NEXT: retq 59; ATOM-NEXT: .LBB1_1: 60; ATOM-NEXT: # kill: def $eax killed $eax killed $rax 61; ATOM-NEXT: xorl %edx, %edx 62; ATOM-NEXT: divl %esi 63; ATOM-NEXT: # kill: def $eax killed $eax def $rax 64; ATOM-NEXT: retq 65; 66; X64-LABEL: div64: 67; X64: # %bb.0: # %entry 68; X64-NEXT: movq %rdi, %rax 69; X64-NEXT: movq %rdi, %rcx 70; X64-NEXT: orq %rsi, %rcx 71; X64-NEXT: shrq $32, %rcx 72; X64-NEXT: je .LBB1_1 73; X64-NEXT: # %bb.2: 74; X64-NEXT: cqto 75; X64-NEXT: idivq %rsi 76; X64-NEXT: retq 77; X64-NEXT: .LBB1_1: 78; X64-NEXT: # kill: def $eax killed $eax killed $rax 79; X64-NEXT: xorl %edx, %edx 80; X64-NEXT: divl %esi 81; X64-NEXT: # kill: def $eax killed $eax def $rax 82; X64-NEXT: retq 83; 84; SLM-LABEL: div64: 85; SLM: # %bb.0: # %entry 86; SLM-NEXT: movq %rdi, %rcx 87; SLM-NEXT: movq %rdi, %rax 88; SLM-NEXT: orq %rsi, %rcx 89; SLM-NEXT: shrq $32, %rcx 90; SLM-NEXT: je .LBB1_1 91; SLM-NEXT: # %bb.2: 92; SLM-NEXT: cqto 93; SLM-NEXT: idivq %rsi 94; SLM-NEXT: retq 95; SLM-NEXT: .LBB1_1: 96; SLM-NEXT: xorl %edx, %edx 97; SLM-NEXT: # kill: def $eax killed $eax killed $rax 98; SLM-NEXT: divl %esi 99; SLM-NEXT: # kill: def $eax killed $eax def $rax 100; SLM-NEXT: retq 101; 102; SKL-LABEL: div64: 103; SKL: # %bb.0: # %entry 104; SKL-NEXT: movq %rdi, %rax 105; SKL-NEXT: movq %rdi, %rcx 106; SKL-NEXT: orq %rsi, %rcx 107; SKL-NEXT: shrq $32, %rcx 108; SKL-NEXT: je .LBB1_1 109; SKL-NEXT: # %bb.2: 110; SKL-NEXT: cqto 111; SKL-NEXT: idivq %rsi 112; SKL-NEXT: retq 113; SKL-NEXT: .LBB1_1: 114; SKL-NEXT: # kill: def $eax killed $eax killed $rax 115; SKL-NEXT: xorl %edx, %edx 116; SKL-NEXT: divl %esi 117; SKL-NEXT: # kill: def $eax killed $eax def $rax 118; SKL-NEXT: retq 119; 120; HUGEWS-LABEL: div64: 121; HUGEWS: # %bb.0: # %entry 122; HUGEWS-NEXT: movq %rdi, %rax 123; HUGEWS-NEXT: cqto 124; HUGEWS-NEXT: idivq %rsi 125; HUGEWS-NEXT: retq 126entry: 127 %div = sdiv i64 %a, %b 128 ret i64 %div 129} 130 131 132; Verify that no extra code is generated when optimizing for size. 133 134define i64 @div64_optsize(i64 %a, i64 %b) optsize { 135; CHECK-LABEL: div64_optsize: 136; CHECK: # %bb.0: 137; CHECK-NEXT: movq %rdi, %rax 138; CHECK-NEXT: cqto 139; CHECK-NEXT: idivq %rsi 140; CHECK-NEXT: retq 141; 142; HUGEWS-LABEL: div64_optsize: 143; HUGEWS: # %bb.0: 144; HUGEWS-NEXT: movq %rdi, %rax 145; HUGEWS-NEXT: cqto 146; HUGEWS-NEXT: idivq %rsi 147; HUGEWS-NEXT: retq 148 %div = sdiv i64 %a, %b 149 ret i64 %div 150} 151 152define i64 @div64_pgso(i64 %a, i64 %b) !prof !15 { 153; CHECK-LABEL: div64_pgso: 154; CHECK: # %bb.0: 155; CHECK-NEXT: movq %rdi, %rax 156; CHECK-NEXT: cqto 157; CHECK-NEXT: idivq %rsi 158; CHECK-NEXT: retq 159; 160; HUGEWS-LABEL: div64_pgso: 161; HUGEWS: # %bb.0: 162; HUGEWS-NEXT: movq %rdi, %rax 163; HUGEWS-NEXT: cqto 164; HUGEWS-NEXT: idivq %rsi 165; HUGEWS-NEXT: retq 166 %div = sdiv i64 %a, %b 167 ret i64 %div 168} 169 170define i64 @div64_hugews(i64 %a, i64 %b) { 171; ATOM-LABEL: div64_hugews: 172; ATOM: # %bb.0: 173; ATOM-NEXT: movq %rdi, %rcx 174; ATOM-NEXT: movq %rdi, %rax 175; ATOM-NEXT: orq %rsi, %rcx 176; ATOM-NEXT: shrq $32, %rcx 177; ATOM-NEXT: je .LBB4_1 178; ATOM-NEXT: # %bb.2: 179; ATOM-NEXT: cqto 180; ATOM-NEXT: idivq %rsi 181; ATOM-NEXT: retq 182; ATOM-NEXT: .LBB4_1: 183; ATOM-NEXT: # kill: def $eax killed $eax killed $rax 184; ATOM-NEXT: xorl %edx, %edx 185; ATOM-NEXT: divl %esi 186; ATOM-NEXT: # kill: def $eax killed $eax def $rax 187; ATOM-NEXT: retq 188; 189; X64-LABEL: div64_hugews: 190; X64: # %bb.0: 191; X64-NEXT: movq %rdi, %rax 192; X64-NEXT: movq %rdi, %rcx 193; X64-NEXT: orq %rsi, %rcx 194; X64-NEXT: shrq $32, %rcx 195; X64-NEXT: je .LBB4_1 196; X64-NEXT: # %bb.2: 197; X64-NEXT: cqto 198; X64-NEXT: idivq %rsi 199; X64-NEXT: retq 200; X64-NEXT: .LBB4_1: 201; X64-NEXT: # kill: def $eax killed $eax killed $rax 202; X64-NEXT: xorl %edx, %edx 203; X64-NEXT: divl %esi 204; X64-NEXT: # kill: def $eax killed $eax def $rax 205; X64-NEXT: retq 206; 207; SLM-LABEL: div64_hugews: 208; SLM: # %bb.0: 209; SLM-NEXT: movq %rdi, %rcx 210; SLM-NEXT: movq %rdi, %rax 211; SLM-NEXT: orq %rsi, %rcx 212; SLM-NEXT: shrq $32, %rcx 213; SLM-NEXT: je .LBB4_1 214; SLM-NEXT: # %bb.2: 215; SLM-NEXT: cqto 216; SLM-NEXT: idivq %rsi 217; SLM-NEXT: retq 218; SLM-NEXT: .LBB4_1: 219; SLM-NEXT: xorl %edx, %edx 220; SLM-NEXT: # kill: def $eax killed $eax killed $rax 221; SLM-NEXT: divl %esi 222; SLM-NEXT: # kill: def $eax killed $eax def $rax 223; SLM-NEXT: retq 224; 225; SKL-LABEL: div64_hugews: 226; SKL: # %bb.0: 227; SKL-NEXT: movq %rdi, %rax 228; SKL-NEXT: movq %rdi, %rcx 229; SKL-NEXT: orq %rsi, %rcx 230; SKL-NEXT: shrq $32, %rcx 231; SKL-NEXT: je .LBB4_1 232; SKL-NEXT: # %bb.2: 233; SKL-NEXT: cqto 234; SKL-NEXT: idivq %rsi 235; SKL-NEXT: retq 236; SKL-NEXT: .LBB4_1: 237; SKL-NEXT: # kill: def $eax killed $eax killed $rax 238; SKL-NEXT: xorl %edx, %edx 239; SKL-NEXT: divl %esi 240; SKL-NEXT: # kill: def $eax killed $eax def $rax 241; SKL-NEXT: retq 242; 243; HUGEWS-LABEL: div64_hugews: 244; HUGEWS: # %bb.0: 245; HUGEWS-NEXT: movq %rdi, %rax 246; HUGEWS-NEXT: cqto 247; HUGEWS-NEXT: idivq %rsi 248; HUGEWS-NEXT: retq 249 %div = sdiv i64 %a, %b 250 ret i64 %div 251} 252 253define i32 @div32_optsize(i32 %a, i32 %b) optsize { 254; CHECK-LABEL: div32_optsize: 255; CHECK: # %bb.0: 256; CHECK-NEXT: movl %edi, %eax 257; CHECK-NEXT: cltd 258; CHECK-NEXT: idivl %esi 259; CHECK-NEXT: retq 260; 261; HUGEWS-LABEL: div32_optsize: 262; HUGEWS: # %bb.0: 263; HUGEWS-NEXT: movl %edi, %eax 264; HUGEWS-NEXT: cltd 265; HUGEWS-NEXT: idivl %esi 266; HUGEWS-NEXT: retq 267 %div = sdiv i32 %a, %b 268 ret i32 %div 269} 270 271define i32 @div32_pgso(i32 %a, i32 %b) !prof !15 { 272; CHECK-LABEL: div32_pgso: 273; CHECK: # %bb.0: 274; CHECK-NEXT: movl %edi, %eax 275; CHECK-NEXT: cltd 276; CHECK-NEXT: idivl %esi 277; CHECK-NEXT: retq 278; 279; HUGEWS-LABEL: div32_pgso: 280; HUGEWS: # %bb.0: 281; HUGEWS-NEXT: movl %edi, %eax 282; HUGEWS-NEXT: cltd 283; HUGEWS-NEXT: idivl %esi 284; HUGEWS-NEXT: retq 285 %div = sdiv i32 %a, %b 286 ret i32 %div 287} 288 289define i32 @div32_minsize(i32 %a, i32 %b) minsize { 290; CHECK-LABEL: div32_minsize: 291; CHECK: # %bb.0: 292; CHECK-NEXT: movl %edi, %eax 293; CHECK-NEXT: cltd 294; CHECK-NEXT: idivl %esi 295; CHECK-NEXT: retq 296; 297; HUGEWS-LABEL: div32_minsize: 298; HUGEWS: # %bb.0: 299; HUGEWS-NEXT: movl %edi, %eax 300; HUGEWS-NEXT: cltd 301; HUGEWS-NEXT: idivl %esi 302; HUGEWS-NEXT: retq 303 %div = sdiv i32 %a, %b 304 ret i32 %div 305} 306 307!llvm.module.flags = !{!1} 308!1 = !{i32 1, !"ProfileSummary", !2} 309!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} 310!3 = !{!"ProfileFormat", !"InstrProf"} 311!4 = !{!"TotalCount", i64 10000} 312!5 = !{!"MaxCount", i64 1000} 313!6 = !{!"MaxInternalCount", i64 1} 314!7 = !{!"MaxFunctionCount", i64 1000} 315!8 = !{!"NumCounts", i64 3} 316!9 = !{!"NumFunctions", i64 3} 317!10 = !{!"DetailedSummary", !11} 318!11 = !{!12, !13, !14} 319!12 = !{i32 10000, i64 1000, i32 1} 320!13 = !{i32 999000, i64 1000, i32 3} 321!14 = !{i32 999999, i64 5, i32 3} 322!15 = !{!"function_entry_count", i64 0} 323