1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 4 5define void @knownbits_zext_in_reg(i8*) nounwind { 6; X32-LABEL: knownbits_zext_in_reg: 7; X32: # %bb.0: # %BB 8; X32-NEXT: pushl %ebp 9; X32-NEXT: pushl %ebx 10; X32-NEXT: pushl %edi 11; X32-NEXT: pushl %esi 12; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 13; X32-NEXT: movzbl (%eax), %eax 14; X32-NEXT: imull $101, %eax, %eax 15; X32-NEXT: shrl $14, %eax 16; X32-NEXT: movzbl %al, %eax 17; X32-NEXT: vmovd %eax, %xmm0 18; X32-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 19; X32-NEXT: vpextrd $1, %xmm0, %ebp 20; X32-NEXT: xorl %ecx, %ecx 21; X32-NEXT: vmovd %xmm0, %esi 22; X32-NEXT: vpextrd $2, %xmm0, %edi 23; X32-NEXT: vpextrd $3, %xmm0, %ebx 24; X32-NEXT: .p2align 4, 0x90 25; X32-NEXT: .LBB0_1: # %CF 26; X32-NEXT: # =>This Loop Header: Depth=1 27; X32-NEXT: # Child Loop BB0_2 Depth 2 28; X32-NEXT: xorl %edx, %edx 29; X32-NEXT: movl %ebp, %eax 30; X32-NEXT: divl %ebp 31; X32-NEXT: xorl %edx, %edx 32; X32-NEXT: movl %esi, %eax 33; X32-NEXT: divl %esi 34; X32-NEXT: xorl %edx, %edx 35; X32-NEXT: movl %edi, %eax 36; X32-NEXT: divl %edi 37; X32-NEXT: xorl %edx, %edx 38; X32-NEXT: movl %ebx, %eax 39; X32-NEXT: divl %ebx 40; X32-NEXT: .p2align 4, 0x90 41; X32-NEXT: .LBB0_2: # %CF237 42; X32-NEXT: # Parent Loop BB0_1 Depth=1 43; X32-NEXT: # => This Inner Loop Header: Depth=2 44; X32-NEXT: testb %cl, %cl 45; X32-NEXT: jne .LBB0_2 46; X32-NEXT: jmp .LBB0_1 47; 48; X64-LABEL: knownbits_zext_in_reg: 49; X64: # %bb.0: # %BB 50; X64-NEXT: movzbl (%rdi), %eax 51; X64-NEXT: imull $101, %eax, %eax 52; X64-NEXT: shrl $14, %eax 53; X64-NEXT: movzbl %al, %eax 54; X64-NEXT: vmovd %eax, %xmm0 55; X64-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 56; X64-NEXT: vpextrd $1, %xmm0, %r8d 57; X64-NEXT: xorl %esi, %esi 58; X64-NEXT: vmovd %xmm0, %r9d 59; X64-NEXT: vpextrd $2, %xmm0, %edi 60; X64-NEXT: vpextrd $3, %xmm0, %ecx 61; X64-NEXT: .p2align 4, 0x90 62; X64-NEXT: .LBB0_1: # %CF 63; X64-NEXT: # =>This Loop Header: Depth=1 64; X64-NEXT: # Child Loop BB0_2 Depth 2 65; X64-NEXT: xorl %edx, %edx 66; X64-NEXT: movl %r8d, %eax 67; X64-NEXT: divl %r8d 68; X64-NEXT: xorl %edx, %edx 69; X64-NEXT: movl %r9d, %eax 70; X64-NEXT: divl %r9d 71; X64-NEXT: xorl %edx, %edx 72; X64-NEXT: movl %edi, %eax 73; X64-NEXT: divl %edi 74; X64-NEXT: xorl %edx, %edx 75; X64-NEXT: movl %ecx, %eax 76; X64-NEXT: divl %ecx 77; X64-NEXT: .p2align 4, 0x90 78; X64-NEXT: .LBB0_2: # %CF237 79; X64-NEXT: # Parent Loop BB0_1 Depth=1 80; X64-NEXT: # => This Inner Loop Header: Depth=2 81; X64-NEXT: testb %sil, %sil 82; X64-NEXT: jne .LBB0_2 83; X64-NEXT: jmp .LBB0_1 84BB: 85 %L5 = load i8, i8* %0 86 %Sl9 = select i1 true, i8 %L5, i8 undef 87 %B21 = udiv i8 %Sl9, -93 88 br label %CF 89 90CF: ; preds = %CF246, %BB 91 %I40 = insertelement <4 x i8> zeroinitializer, i8 %B21, i32 1 92 %B41 = srem <4 x i8> %I40, %I40 93 br label %CF237 94 95CF237: ; preds = %CF237, %CF 96 %Cmp73 = icmp ne i1 undef, undef 97 br i1 %Cmp73, label %CF237, label %CF246 98 99CF246: ; preds = %CF237 100 %Cmp117 = icmp ult <4 x i8> %B41, undef 101 %E156 = extractelement <4 x i1> %Cmp117, i32 2 102 br label %CF 103} 104 105define i32 @knownbits_mask_add_lshr(i32 %a0, i32 %a1) nounwind { 106; X32-LABEL: knownbits_mask_add_lshr: 107; X32: # %bb.0: 108; X32-NEXT: xorl %eax, %eax 109; X32-NEXT: retl 110; 111; X64-LABEL: knownbits_mask_add_lshr: 112; X64: # %bb.0: 113; X64-NEXT: xorl %eax, %eax 114; X64-NEXT: retq 115 %1 = and i32 %a0, 32767 116 %2 = and i32 %a1, 32766 117 %3 = add i32 %1, %2 118 %4 = lshr i32 %3, 17 119 ret i32 %4 120} 121 122define i128 @knownbits_mask_addc_shl(i64 %a0, i64 %a1, i64 %a2) nounwind { 123; X32-LABEL: knownbits_mask_addc_shl: 124; X32: # %bb.0: 125; X32-NEXT: pushl %edi 126; X32-NEXT: pushl %esi 127; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 128; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 129; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 130; X32-NEXT: movl $-1024, %esi # imm = 0xFC00 131; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 132; X32-NEXT: andl %esi, %edi 133; X32-NEXT: andl {{[0-9]+}}(%esp), %esi 134; X32-NEXT: addl %edi, %esi 135; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx 136; X32-NEXT: adcl $0, %ecx 137; X32-NEXT: shldl $22, %edx, %ecx 138; X32-NEXT: shldl $22, %esi, %edx 139; X32-NEXT: movl %edx, 8(%eax) 140; X32-NEXT: movl %ecx, 12(%eax) 141; X32-NEXT: movl $0, 4(%eax) 142; X32-NEXT: movl $0, (%eax) 143; X32-NEXT: popl %esi 144; X32-NEXT: popl %edi 145; X32-NEXT: retl $4 146; 147; X64-LABEL: knownbits_mask_addc_shl: 148; X64: # %bb.0: 149; X64-NEXT: andq $-1024, %rdi # imm = 0xFC00 150; X64-NEXT: andq $-1024, %rsi # imm = 0xFC00 151; X64-NEXT: addq %rdi, %rsi 152; X64-NEXT: adcl $0, %edx 153; X64-NEXT: shldq $54, %rsi, %rdx 154; X64-NEXT: xorl %eax, %eax 155; X64-NEXT: retq 156 %1 = and i64 %a0, -1024 157 %2 = zext i64 %1 to i128 158 %3 = and i64 %a1, -1024 159 %4 = zext i64 %3 to i128 160 %5 = add i128 %2, %4 161 %6 = zext i64 %a2 to i128 162 %7 = shl i128 %6, 64 163 %8 = add i128 %5, %7 164 %9 = shl i128 %8, 54 165 ret i128 %9 166} 167 168define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind { 169; X32-LABEL: knownbits_uaddo_saddo: 170; X32: # %bb.0: 171; X32-NEXT: pushl %ebx 172; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 173; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 174; X32-NEXT: movl %ecx, %edx 175; X32-NEXT: addl %eax, %edx 176; X32-NEXT: setb %bl 177; X32-NEXT: testl %eax, %eax 178; X32-NEXT: setns %al 179; X32-NEXT: testl %ecx, %ecx 180; X32-NEXT: setns %cl 181; X32-NEXT: cmpb %al, %cl 182; X32-NEXT: sete %al 183; X32-NEXT: testl %edx, %edx 184; X32-NEXT: setns %dl 185; X32-NEXT: cmpb %dl, %cl 186; X32-NEXT: setne %dl 187; X32-NEXT: andb %al, %dl 188; X32-NEXT: orb %bl, %dl 189; X32-NEXT: xorl %eax, %eax 190; X32-NEXT: popl %ebx 191; X32-NEXT: retl 192; 193; X64-LABEL: knownbits_uaddo_saddo: 194; X64: # %bb.0: 195; X64-NEXT: shlq $32, %rdi 196; X64-NEXT: shlq $32, %rsi 197; X64-NEXT: addq %rdi, %rsi 198; X64-NEXT: setb %al 199; X64-NEXT: seto %dl 200; X64-NEXT: orb %al, %dl 201; X64-NEXT: xorl %eax, %eax 202; X64-NEXT: retq 203 %1 = shl i64 %a0, 32 204 %2 = shl i64 %a1, 32 205 %u = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %1, i64 %2) 206 %uval = extractvalue {i64, i1} %u, 0 207 %uovf = extractvalue {i64, i1} %u, 1 208 %s = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %1, i64 %2) 209 %sval = extractvalue {i64, i1} %s, 0 210 %sovf = extractvalue {i64, i1} %s, 1 211 %sum = add i64 %uval, %sval 212 %3 = trunc i64 %sum to i32 213 %4 = or i1 %uovf, %sovf 214 %ret0 = insertvalue {i32, i1} undef, i32 %3, 0 215 %ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1 216 ret {i32, i1} %ret1 217} 218 219define {i32, i1} @knownbits_usubo_ssubo(i64 %a0, i64 %a1) nounwind { 220; X32-LABEL: knownbits_usubo_ssubo: 221; X32: # %bb.0: 222; X32-NEXT: pushl %ebx 223; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 224; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 225; X32-NEXT: movl %ecx, %edx 226; X32-NEXT: subl %eax, %edx 227; X32-NEXT: setb %bl 228; X32-NEXT: testl %eax, %eax 229; X32-NEXT: setns %al 230; X32-NEXT: testl %ecx, %ecx 231; X32-NEXT: setns %cl 232; X32-NEXT: cmpb %al, %cl 233; X32-NEXT: setne %al 234; X32-NEXT: testl %edx, %edx 235; X32-NEXT: setns %dl 236; X32-NEXT: cmpb %dl, %cl 237; X32-NEXT: setne %dl 238; X32-NEXT: andb %al, %dl 239; X32-NEXT: orb %bl, %dl 240; X32-NEXT: xorl %eax, %eax 241; X32-NEXT: popl %ebx 242; X32-NEXT: retl 243; 244; X64-LABEL: knownbits_usubo_ssubo: 245; X64: # %bb.0: 246; X64-NEXT: shlq $32, %rdi 247; X64-NEXT: shlq $32, %rsi 248; X64-NEXT: cmpq %rsi, %rdi 249; X64-NEXT: setb %al 250; X64-NEXT: seto %dl 251; X64-NEXT: orb %al, %dl 252; X64-NEXT: xorl %eax, %eax 253; X64-NEXT: retq 254 %1 = shl i64 %a0, 32 255 %2 = shl i64 %a1, 32 256 %u = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %1, i64 %2) 257 %uval = extractvalue {i64, i1} %u, 0 258 %uovf = extractvalue {i64, i1} %u, 1 259 %s = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %1, i64 %2) 260 %sval = extractvalue {i64, i1} %s, 0 261 %sovf = extractvalue {i64, i1} %s, 1 262 %sum = add i64 %uval, %sval 263 %3 = trunc i64 %sum to i32 264 %4 = or i1 %uovf, %sovf 265 %ret0 = insertvalue {i32, i1} undef, i32 %3, 0 266 %ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1 267 ret {i32, i1} %ret1 268} 269 270declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone 271declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone 272declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone 273declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone 274