1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK,SSE2 3; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK,SSE41 4 5define i32 @test_eq_1(<4 x i32> %A, <4 x i32> %B) { 6; SSE2-LABEL: test_eq_1: 7; SSE2: # %bb.0: 8; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 9; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 10; SSE2-NEXT: movd %xmm0, %eax 11; SSE2-NEXT: notl %eax 12; SSE2-NEXT: retq 13; 14; SSE41-LABEL: test_eq_1: 15; SSE41: # %bb.0: 16; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 17; SSE41-NEXT: pextrd $1, %xmm1, %eax 18; SSE41-NEXT: notl %eax 19; SSE41-NEXT: retq 20 %cmp = icmp slt <4 x i32> %A, %B 21 %sext = sext <4 x i1> %cmp to <4 x i32> 22 %cmp1 = icmp eq <4 x i32> %sext, zeroinitializer 23 %t0 = extractelement <4 x i1> %cmp1, i32 1 24 %t1 = sext i1 %t0 to i32 25 ret i32 %t1 26} 27 28define i32 @test_ne_1(<4 x i32> %A, <4 x i32> %B) { 29; SSE2-LABEL: test_ne_1: 30; SSE2: # %bb.0: 31; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 32; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 33; SSE2-NEXT: movd %xmm0, %eax 34; SSE2-NEXT: retq 35; 36; SSE41-LABEL: test_ne_1: 37; SSE41: # %bb.0: 38; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 39; SSE41-NEXT: pextrd $1, %xmm1, %eax 40; SSE41-NEXT: retq 41 %cmp = icmp slt <4 x i32> %A, %B 42 %sext = sext <4 x i1> %cmp to <4 x i32> 43 %cmp1 = icmp ne <4 x i32> %sext, zeroinitializer 44 %t0 = extractelement <4 x i1> %cmp1, i32 1 45 %t1 = sext i1 %t0 to i32 46 ret i32 %t1 47} 48 49define i32 @test_le_1(<4 x i32> %A, <4 x i32> %B) { 50; CHECK-LABEL: test_le_1: 51; CHECK: # %bb.0: 52; CHECK-NEXT: movl $-1, %eax 53; CHECK-NEXT: retq 54 %cmp = icmp slt <4 x i32> %A, %B 55 %sext = sext <4 x i1> %cmp to <4 x i32> 56 %cmp1 = icmp sle <4 x i32> %sext, zeroinitializer 57 %t0 = extractelement <4 x i1> %cmp1, i32 1 58 %t1 = sext i1 %t0 to i32 59 ret i32 %t1 60} 61 62define i32 @test_ge_1(<4 x i32> %A, <4 x i32> %B) { 63; SSE2-LABEL: test_ge_1: 64; SSE2: # %bb.0: 65; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 66; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 67; SSE2-NEXT: movd %xmm0, %eax 68; SSE2-NEXT: notl %eax 69; SSE2-NEXT: retq 70; 71; SSE41-LABEL: test_ge_1: 72; SSE41: # %bb.0: 73; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 74; SSE41-NEXT: pextrd $1, %xmm1, %eax 75; SSE41-NEXT: notl %eax 76; SSE41-NEXT: retq 77 %cmp = icmp slt <4 x i32> %A, %B 78 %sext = sext <4 x i1> %cmp to <4 x i32> 79 %cmp1 = icmp sge <4 x i32> %sext, zeroinitializer 80 %t0 = extractelement <4 x i1> %cmp1, i32 1 81 %t1 = sext i1 %t0 to i32 82 ret i32 %t1 83} 84 85define i32 @test_lt_1(<4 x i32> %A, <4 x i32> %B) { 86; SSE2-LABEL: test_lt_1: 87; SSE2: # %bb.0: 88; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 89; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 90; SSE2-NEXT: movd %xmm0, %eax 91; SSE2-NEXT: retq 92; 93; SSE41-LABEL: test_lt_1: 94; SSE41: # %bb.0: 95; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 96; SSE41-NEXT: pextrd $1, %xmm1, %eax 97; SSE41-NEXT: retq 98 %cmp = icmp slt <4 x i32> %A, %B 99 %sext = sext <4 x i1> %cmp to <4 x i32> 100 %cmp1 = icmp slt <4 x i32> %sext, zeroinitializer 101 %t0 = extractelement <4 x i1> %cmp, i32 1 102 %t1 = sext i1 %t0 to i32 103 ret i32 %t1 104} 105 106define i32 @test_gt_1(<4 x i32> %A, <4 x i32> %B) { 107; CHECK-LABEL: test_gt_1: 108; CHECK: # %bb.0: 109; CHECK-NEXT: xorl %eax, %eax 110; CHECK-NEXT: retq 111 %cmp = icmp slt <4 x i32> %A, %B 112 %sext = sext <4 x i1> %cmp to <4 x i32> 113 %cmp1 = icmp sgt <4 x i32> %sext, zeroinitializer 114 %t0 = extractelement <4 x i1> %cmp1, i32 1 115 %t1 = sext i1 %t0 to i32 116 ret i32 %t1 117} 118 119define i32 @test_eq_2(<4 x i32> %A, <4 x i32> %B) { 120; SSE2-LABEL: test_eq_2: 121; SSE2: # %bb.0: 122; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 123; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 124; SSE2-NEXT: movd %xmm0, %eax 125; SSE2-NEXT: notl %eax 126; SSE2-NEXT: retq 127; 128; SSE41-LABEL: test_eq_2: 129; SSE41: # %bb.0: 130; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 131; SSE41-NEXT: pextrd $1, %xmm0, %eax 132; SSE41-NEXT: notl %eax 133; SSE41-NEXT: retq 134 %cmp = icmp slt <4 x i32> %B, %A 135 %sext = sext <4 x i1> %cmp to <4 x i32> 136 %cmp1 = icmp eq <4 x i32> %sext, zeroinitializer 137 %t0 = extractelement <4 x i1> %cmp1, i32 1 138 %t1 = sext i1 %t0 to i32 139 ret i32 %t1 140} 141 142define i32 @test_ne_2(<4 x i32> %A, <4 x i32> %B) { 143; SSE2-LABEL: test_ne_2: 144; SSE2: # %bb.0: 145; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 146; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 147; SSE2-NEXT: movd %xmm0, %eax 148; SSE2-NEXT: retq 149; 150; SSE41-LABEL: test_ne_2: 151; SSE41: # %bb.0: 152; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 153; SSE41-NEXT: pextrd $1, %xmm0, %eax 154; SSE41-NEXT: retq 155 %cmp = icmp slt <4 x i32> %B, %A 156 %sext = sext <4 x i1> %cmp to <4 x i32> 157 %cmp1 = icmp ne <4 x i32> %sext, zeroinitializer 158 %t0 = extractelement <4 x i1> %cmp1, i32 1 159 %t1 = sext i1 %t0 to i32 160 ret i32 %t1 161} 162 163define i32 @test_le_2(<4 x i32> %A, <4 x i32> %B) { 164; SSE2-LABEL: test_le_2: 165; SSE2: # %bb.0: 166; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 167; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 168; SSE2-NEXT: movd %xmm0, %eax 169; SSE2-NEXT: notl %eax 170; SSE2-NEXT: retq 171; 172; SSE41-LABEL: test_le_2: 173; SSE41: # %bb.0: 174; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 175; SSE41-NEXT: pextrd $1, %xmm0, %eax 176; SSE41-NEXT: notl %eax 177; SSE41-NEXT: retq 178 %cmp = icmp slt <4 x i32> %B, %A 179 %sext = sext <4 x i1> %cmp to <4 x i32> 180 %cmp1 = icmp sle <4 x i32> zeroinitializer, %sext 181 %t0 = extractelement <4 x i1> %cmp1, i32 1 182 %t1 = sext i1 %t0 to i32 183 ret i32 %t1 184} 185 186define i32 @test_ge_2(<4 x i32> %A, <4 x i32> %B) { 187; CHECK-LABEL: test_ge_2: 188; CHECK: # %bb.0: 189; CHECK-NEXT: movl $-1, %eax 190; CHECK-NEXT: retq 191 %cmp = icmp slt <4 x i32> %B, %A 192 %sext = sext <4 x i1> %cmp to <4 x i32> 193 %cmp1 = icmp sge <4 x i32> zeroinitializer, %sext 194 %t0 = extractelement <4 x i1> %cmp1, i32 1 195 %t1 = sext i1 %t0 to i32 196 ret i32 %t1 197} 198 199define i32 @test_lt_2(<4 x i32> %A, <4 x i32> %B) { 200; SSE2-LABEL: test_lt_2: 201; SSE2: # %bb.0: 202; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 203; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 204; SSE2-NEXT: movd %xmm0, %eax 205; SSE2-NEXT: retq 206; 207; SSE41-LABEL: test_lt_2: 208; SSE41: # %bb.0: 209; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 210; SSE41-NEXT: pextrd $1, %xmm0, %eax 211; SSE41-NEXT: retq 212 %cmp = icmp slt <4 x i32> %B, %A 213 %sext = sext <4 x i1> %cmp to <4 x i32> 214 %cmp1 = icmp slt <4 x i32> zeroinitializer, %sext 215 %t0 = extractelement <4 x i1> %cmp, i32 1 216 %t1 = sext i1 %t0 to i32 217 ret i32 %t1 218} 219 220define i32 @test_gt_2(<4 x i32> %A, <4 x i32> %B) { 221; SSE2-LABEL: test_gt_2: 222; SSE2: # %bb.0: 223; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 224; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 225; SSE2-NEXT: movd %xmm0, %eax 226; SSE2-NEXT: retq 227; 228; SSE41-LABEL: test_gt_2: 229; SSE41: # %bb.0: 230; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 231; SSE41-NEXT: pextrd $1, %xmm0, %eax 232; SSE41-NEXT: retq 233 %cmp = icmp slt <4 x i32> %B, %A 234 %sext = sext <4 x i1> %cmp to <4 x i32> 235 %cmp1 = icmp sgt <4 x i32> zeroinitializer, %sext 236 %t0 = extractelement <4 x i1> %cmp1, i32 1 237 %t1 = sext i1 %t0 to i32 238 ret i32 %t1 239} 240 241; (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2) 242; Don't combine with i1 - out of range constant 243define void @test_i1_uge(i1 *%A2) { 244; CHECK-LABEL: test_i1_uge: 245; CHECK: # %bb.0: 246; CHECK-NEXT: movb (%rdi), %al 247; CHECK-NEXT: movl %eax, %ecx 248; CHECK-NEXT: xorb $1, %cl 249; CHECK-NEXT: andb %cl, %al 250; CHECK-NEXT: movzbl %al, %eax 251; CHECK-NEXT: andl $1, %eax 252; CHECK-NEXT: negq %rax 253; CHECK-NEXT: andb $1, %cl 254; CHECK-NEXT: movb %cl, (%rdi,%rax) 255; CHECK-NEXT: retq 256 %L5 = load i1, i1* %A2 257 %C3 = icmp ne i1 %L5, true 258 %C8 = icmp eq i1 %L5, false 259 %C9 = icmp ugt i1 %C3, %C8 260 %G3 = getelementptr i1, i1* %A2, i1 %C9 261 store i1 %C3, i1* %G3 262 ret void 263} 264 265; This should not get folded to 0. 266 267define i64 @PR40657(i8 %var2, i8 %var9) { 268; CHECK-LABEL: PR40657: 269; CHECK: # %bb.0: 270; CHECK-NEXT: notb %sil 271; CHECK-NEXT: addb %dil, %sil 272; CHECK-NEXT: movzbl %sil, %eax 273; CHECK-NEXT: andl $1, %eax 274; CHECK-NEXT: retq 275 %var6 = trunc i8 %var9 to i1 276 %var7 = trunc i8 175 to i1 277 %var3 = sub nsw i1 %var6, %var7 278 %var4 = icmp eq i64 1114591064, 1114591064 279 %var1 = udiv i1 %var3, %var4 280 %var0 = trunc i8 %var2 to i1 281 %res = sub nsw nuw i1 %var0, %var1 282 %res.cast = zext i1 %res to i64 283 ret i64 %res.cast 284} 285 286; This should not get folded to 0. 287 288define i64 @PR40657_commute(i8 %var7, i8 %var8, i8 %var9) { 289; CHECK-LABEL: PR40657_commute: 290; CHECK: # %bb.0: 291; CHECK-NEXT: subb %dil, %sil 292; CHECK-NEXT: subb %sil, %dl 293; CHECK-NEXT: subb %dl, %sil 294; CHECK-NEXT: xorb %dl, %sil 295; CHECK-NEXT: subb %sil, %dl 296; CHECK-NEXT: movzbl %dl, %eax 297; CHECK-NEXT: andl $1, %eax 298; CHECK-NEXT: retq 299 %var4 = trunc i8 %var9 to i1 300 %var5 = trunc i8 %var8 to i1 301 %var6 = trunc i8 %var7 to i1 302 %var3 = sub nsw nuw i1 %var5, %var6 303 %var0 = sub nuw i1 %var4, %var3 304 %var2 = sub i1 %var3, %var0 305 %var1 = icmp ne i1 %var0, %var2 306 %res = sub nsw nuw i1 %var0, %var1 307 %res.cast = zext i1 %res to i64 308 ret i64 %res.cast 309} 310 311define i64 @sub_to_shift_to_add(i32 %x, i32 %y, i64 %s1, i64 %s2) { 312; CHECK-LABEL: sub_to_shift_to_add: 313; CHECK: # %bb.0: 314; CHECK-NEXT: movq %rdx, %rax 315; CHECK-NEXT: addl %esi, %esi 316; CHECK-NEXT: cmpl %esi, %edi 317; CHECK-NEXT: cmovneq %rcx, %rax 318; CHECK-NEXT: retq 319 %sub = sub i32 %x, %y 320 %cmp = icmp eq i32 %sub, %y 321 %r = select i1 %cmp, i64 %s1, i64 %s2 322 ret i64 %r 323} 324 325define <4 x float> @sub_to_shift_to_add_vec(<4 x i32> %x, <4 x i32> %y, <4 x float> %s1, <4 x float> %s2) { 326; SSE2-LABEL: sub_to_shift_to_add_vec: 327; SSE2: # %bb.0: 328; SSE2-NEXT: paddd %xmm1, %xmm1 329; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 330; SSE2-NEXT: pand %xmm1, %xmm2 331; SSE2-NEXT: pandn %xmm3, %xmm1 332; SSE2-NEXT: por %xmm2, %xmm1 333; SSE2-NEXT: movdqa %xmm1, %xmm0 334; SSE2-NEXT: retq 335; 336; SSE41-LABEL: sub_to_shift_to_add_vec: 337; SSE41: # %bb.0: 338; SSE41-NEXT: paddd %xmm1, %xmm1 339; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 340; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3 341; SSE41-NEXT: movaps %xmm3, %xmm0 342; SSE41-NEXT: retq 343 %sub = sub <4 x i32> %x, %y 344 %cmp = icmp eq <4 x i32> %sub, %y 345 %r = select <4 x i1> %cmp, <4 x float> %s1, <4 x float> %s2 346 ret <4 x float> %r 347} 348 349define i64 @sub_constant_to_shift_to_add(i32 %x, i64 %s1, i64 %s2) { 350; CHECK-LABEL: sub_constant_to_shift_to_add: 351; CHECK: # %bb.0: 352; CHECK-NEXT: movq %rsi, %rax 353; CHECK-NEXT: addl %edi, %edi 354; CHECK-NEXT: cmpl $42, %edi 355; CHECK-NEXT: cmovneq %rdx, %rax 356; CHECK-NEXT: retq 357 %sub = sub i32 42, %x 358 %cmp = icmp eq i32 %sub, %x 359 %r = select i1 %cmp, i64 %s1, i64 %s2 360 ret i64 %r 361} 362 363