1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 4 5define void @knownbits_zext_in_reg(i8*) nounwind { 6; X32-LABEL: knownbits_zext_in_reg: 7; X32: # %bb.0: # %BB 8; X32-NEXT: pushl %ebx 9; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 10; X32-NEXT: movzbl (%eax), %ecx 11; X32-NEXT: imull $101, %ecx, %eax 12; X32-NEXT: shrl $14, %eax 13; X32-NEXT: imull $177, %ecx, %edx 14; X32-NEXT: shrl $14, %edx 15; X32-NEXT: movzbl %al, %ecx 16; X32-NEXT: xorl %ebx, %ebx 17; X32-NEXT: .p2align 4, 0x90 18; X32-NEXT: .LBB0_1: # %CF 19; X32-NEXT: # =>This Loop Header: Depth=1 20; X32-NEXT: # Child Loop BB0_2 Depth 2 21; X32-NEXT: movl %ecx, %eax 22; X32-NEXT: divb %dl 23; X32-NEXT: .p2align 4, 0x90 24; X32-NEXT: .LBB0_2: # %CF237 25; X32-NEXT: # Parent Loop BB0_1 Depth=1 26; X32-NEXT: # => This Inner Loop Header: Depth=2 27; X32-NEXT: testb %bl, %bl 28; X32-NEXT: jne .LBB0_2 29; X32-NEXT: jmp .LBB0_1 30; 31; X64-LABEL: knownbits_zext_in_reg: 32; X64: # %bb.0: # %BB 33; X64-NEXT: movzbl (%rdi), %eax 34; X64-NEXT: imull $101, %eax, %ecx 35; X64-NEXT: shrl $14, %ecx 36; X64-NEXT: imull $177, %eax, %edx 37; X64-NEXT: shrl $14, %edx 38; X64-NEXT: movzbl %cl, %ecx 39; X64-NEXT: xorl %esi, %esi 40; X64-NEXT: .p2align 4, 0x90 41; X64-NEXT: .LBB0_1: # %CF 42; X64-NEXT: # =>This Loop Header: Depth=1 43; X64-NEXT: # Child Loop BB0_2 Depth 2 44; X64-NEXT: movl %ecx, %eax 45; X64-NEXT: divb %dl 46; X64-NEXT: .p2align 4, 0x90 47; X64-NEXT: .LBB0_2: # %CF237 48; X64-NEXT: # Parent Loop BB0_1 Depth=1 49; X64-NEXT: # => This Inner Loop Header: Depth=2 50; X64-NEXT: testb %sil, %sil 51; X64-NEXT: jne .LBB0_2 52; X64-NEXT: jmp .LBB0_1 53BB: 54 %L5 = load i8, i8* %0 55 %Sl9 = select i1 true, i8 %L5, i8 undef 56 %B21 = udiv i8 %Sl9, -93 57 %B22 = udiv i8 %Sl9, 93 58 br label %CF 59 60CF: ; preds = %CF246, %BB 61 %I40 = insertelement <4 x i8> zeroinitializer, i8 %B21, i32 1 62 %I41 = insertelement <4 x i8> zeroinitializer, i8 %B22, i32 1 63 %B41 = srem <4 x i8> %I40, %I41 64 br label %CF237 65 66CF237: ; preds = %CF237, %CF 67 %Cmp73 = icmp ne i1 undef, undef 68 br i1 %Cmp73, label %CF237, label %CF246 69 70CF246: ; preds = %CF237 71 %Cmp117 = icmp ult <4 x i8> %B41, undef 72 %E156 = extractelement <4 x i1> %Cmp117, i32 2 73 br label %CF 74} 75 76define i32 @knownbits_mask_add_lshr(i32 %a0, i32 %a1) nounwind { 77; X32-LABEL: knownbits_mask_add_lshr: 78; X32: # %bb.0: 79; X32-NEXT: xorl %eax, %eax 80; X32-NEXT: retl 81; 82; X64-LABEL: knownbits_mask_add_lshr: 83; X64: # %bb.0: 84; X64-NEXT: xorl %eax, %eax 85; X64-NEXT: retq 86 %1 = and i32 %a0, 32767 87 %2 = and i32 %a1, 32766 88 %3 = add i32 %1, %2 89 %4 = lshr i32 %3, 17 90 ret i32 %4 91} 92 93define i128 @knownbits_mask_addc_shl(i64 %a0, i64 %a1, i64 %a2) nounwind { 94; X32-LABEL: knownbits_mask_addc_shl: 95; X32: # %bb.0: 96; X32-NEXT: pushl %edi 97; X32-NEXT: pushl %esi 98; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 99; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 100; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 101; X32-NEXT: movl $-1024, %esi # imm = 0xFC00 102; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 103; X32-NEXT: andl %esi, %edi 104; X32-NEXT: andl {{[0-9]+}}(%esp), %esi 105; X32-NEXT: addl %edi, %esi 106; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx 107; X32-NEXT: adcl $0, %ecx 108; X32-NEXT: shldl $22, %edx, %ecx 109; X32-NEXT: shldl $22, %esi, %edx 110; X32-NEXT: movl %edx, 8(%eax) 111; X32-NEXT: movl %ecx, 12(%eax) 112; X32-NEXT: movl $0, 4(%eax) 113; X32-NEXT: movl $0, (%eax) 114; X32-NEXT: popl %esi 115; X32-NEXT: popl %edi 116; X32-NEXT: retl $4 117; 118; X64-LABEL: knownbits_mask_addc_shl: 119; X64: # %bb.0: 120; X64-NEXT: andq $-1024, %rdi # imm = 0xFC00 121; X64-NEXT: andq $-1024, %rsi # imm = 0xFC00 122; X64-NEXT: addq %rdi, %rsi 123; X64-NEXT: adcq $0, %rdx 124; X64-NEXT: shldq $54, %rsi, %rdx 125; X64-NEXT: xorl %eax, %eax 126; X64-NEXT: retq 127 %1 = and i64 %a0, -1024 128 %2 = zext i64 %1 to i128 129 %3 = and i64 %a1, -1024 130 %4 = zext i64 %3 to i128 131 %5 = add i128 %2, %4 132 %6 = zext i64 %a2 to i128 133 %7 = shl i128 %6, 64 134 %8 = add i128 %5, %7 135 %9 = shl i128 %8, 54 136 ret i128 %9 137} 138 139define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind { 140; X32-LABEL: knownbits_uaddo_saddo: 141; X32: # %bb.0: 142; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 143; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 144; X32-NEXT: setb %al 145; X32-NEXT: seto %dl 146; X32-NEXT: orb %al, %dl 147; X32-NEXT: xorl %eax, %eax 148; X32-NEXT: retl 149; 150; X64-LABEL: knownbits_uaddo_saddo: 151; X64: # %bb.0: 152; X64-NEXT: shlq $32, %rdi 153; X64-NEXT: shlq $32, %rsi 154; X64-NEXT: addq %rdi, %rsi 155; X64-NEXT: setb %al 156; X64-NEXT: seto %dl 157; X64-NEXT: orb %al, %dl 158; X64-NEXT: xorl %eax, %eax 159; X64-NEXT: retq 160 %1 = shl i64 %a0, 32 161 %2 = shl i64 %a1, 32 162 %u = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %1, i64 %2) 163 %uval = extractvalue {i64, i1} %u, 0 164 %uovf = extractvalue {i64, i1} %u, 1 165 %s = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %1, i64 %2) 166 %sval = extractvalue {i64, i1} %s, 0 167 %sovf = extractvalue {i64, i1} %s, 1 168 %sum = add i64 %uval, %sval 169 %3 = trunc i64 %sum to i32 170 %4 = or i1 %uovf, %sovf 171 %ret0 = insertvalue {i32, i1} undef, i32 %3, 0 172 %ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1 173 ret {i32, i1} %ret1 174} 175 176define {i32, i1} @knownbits_usubo_ssubo(i64 %a0, i64 %a1) nounwind { 177; X32-LABEL: knownbits_usubo_ssubo: 178; X32: # %bb.0: 179; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 180; X32-NEXT: cmpl {{[0-9]+}}(%esp), %eax 181; X32-NEXT: setb %al 182; X32-NEXT: seto %dl 183; X32-NEXT: orb %al, %dl 184; X32-NEXT: xorl %eax, %eax 185; X32-NEXT: retl 186; 187; X64-LABEL: knownbits_usubo_ssubo: 188; X64: # %bb.0: 189; X64-NEXT: shlq $32, %rdi 190; X64-NEXT: shlq $32, %rsi 191; X64-NEXT: cmpq %rsi, %rdi 192; X64-NEXT: setb %al 193; X64-NEXT: seto %dl 194; X64-NEXT: orb %al, %dl 195; X64-NEXT: xorl %eax, %eax 196; X64-NEXT: retq 197 %1 = shl i64 %a0, 32 198 %2 = shl i64 %a1, 32 199 %u = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %1, i64 %2) 200 %uval = extractvalue {i64, i1} %u, 0 201 %uovf = extractvalue {i64, i1} %u, 1 202 %s = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %1, i64 %2) 203 %sval = extractvalue {i64, i1} %s, 0 204 %sovf = extractvalue {i64, i1} %s, 1 205 %sum = add i64 %uval, %sval 206 %3 = trunc i64 %sum to i32 207 %4 = or i1 %uovf, %sovf 208 %ret0 = insertvalue {i32, i1} undef, i32 %3, 0 209 %ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1 210 ret {i32, i1} %ret1 211} 212 213declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone 214declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone 215declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone 216declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone 217 218define i32 @knownbits_fshl(i32 %a0) nounwind { 219; X32-LABEL: knownbits_fshl: 220; X32: # %bb.0: 221; X32-NEXT: movl $3, %eax 222; X32-NEXT: retl 223; 224; X64-LABEL: knownbits_fshl: 225; X64: # %bb.0: 226; X64-NEXT: movl $3, %eax 227; X64-NEXT: retq 228 %1 = tail call i32 @llvm.fshl.i32(i32 %a0, i32 -1, i32 5) 229 %2 = and i32 %1, 3 230 ret i32 %2 231} 232 233define i32 @knownbits_fshr(i32 %a0) nounwind { 234; X32-LABEL: knownbits_fshr: 235; X32: # %bb.0: 236; X32-NEXT: movl $3, %eax 237; X32-NEXT: retl 238; 239; X64-LABEL: knownbits_fshr: 240; X64: # %bb.0: 241; X64-NEXT: movl $3, %eax 242; X64-NEXT: retq 243 %1 = tail call i32 @llvm.fshr.i32(i32 %a0, i32 -1, i32 5) 244 %2 = and i32 %1, 3 245 ret i32 %2 246} 247 248declare i32 @llvm.fshl.i32(i32, i32, i32) nounwind readnone 249declare i32 @llvm.fshr.i32(i32, i32, i32) nounwind readnone 250