1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s 3 4define i32 @and_self(i32 %x) { 5; CHECK-LABEL: and_self: 6; CHECK: # %bb.0: 7; CHECK-NEXT: movl %edi, %eax 8; CHECK-NEXT: retq 9 %and = and i32 %x, %x 10 ret i32 %and 11} 12 13define <4 x i32> @and_self_vec(<4 x i32> %x) { 14; CHECK-LABEL: and_self_vec: 15; CHECK: # %bb.0: 16; CHECK-NEXT: retq 17 %and = and <4 x i32> %x, %x 18 ret <4 x i32> %and 19} 20 21; 22; Verify that the DAGCombiner is able to fold a vector AND into a blend 23; if one of the operands to the AND is a vector of all constants, and each 24; constant element is either zero or all-ones. 25; 26 27define <4 x i32> @test1(<4 x i32> %A) { 28; CHECK-LABEL: test1: 29; CHECK: # %bb.0: 30; CHECK-NEXT: xorps %xmm1, %xmm1 31; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 32; CHECK-NEXT: retq 33 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 0> 34 ret <4 x i32> %1 35} 36 37define <4 x i32> @test2(<4 x i32> %A) { 38; CHECK-LABEL: test2: 39; CHECK: # %bb.0: 40; CHECK-NEXT: xorps %xmm1, %xmm1 41; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 42; CHECK-NEXT: retq 43 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 0> 44 ret <4 x i32> %1 45} 46 47define <4 x i32> @test3(<4 x i32> %A) { 48; CHECK-LABEL: test3: 49; CHECK: # %bb.0: 50; CHECK-NEXT: xorps %xmm1, %xmm1 51; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3] 52; CHECK-NEXT: retq 53 %1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 0> 54 ret <4 x i32> %1 55} 56 57define <4 x i32> @test4(<4 x i32> %A) { 58; CHECK-LABEL: test4: 59; CHECK: # %bb.0: 60; CHECK-NEXT: xorps %xmm1, %xmm1 61; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 62; CHECK-NEXT: retq 63 %1 = and <4 x i32> %A, <i32 0, i32 0, i32 0, i32 -1> 64 ret <4 x i32> %1 65} 66 67define <4 x i32> @test5(<4 x i32> %A) { 68; CHECK-LABEL: test5: 69; CHECK: # %bb.0: 70; CHECK-NEXT: xorps %xmm1, %xmm1 71; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 72; CHECK-NEXT: retq 73 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0> 74 ret <4 x i32> %1 75} 76 77define <4 x i32> @test6(<4 x i32> %A) { 78; CHECK-LABEL: test6: 79; CHECK: # %bb.0: 80; CHECK-NEXT: xorps %xmm1, %xmm1 81; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 82; CHECK-NEXT: retq 83 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1> 84 ret <4 x i32> %1 85} 86 87define <4 x i32> @test7(<4 x i32> %A) { 88; CHECK-LABEL: test7: 89; CHECK: # %bb.0: 90; CHECK-NEXT: xorps %xmm1, %xmm1 91; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 92; CHECK-NEXT: retq 93 %1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 -1> 94 ret <4 x i32> %1 95} 96 97define <4 x i32> @test8(<4 x i32> %A) { 98; CHECK-LABEL: test8: 99; CHECK: # %bb.0: 100; CHECK-NEXT: xorps %xmm1, %xmm1 101; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 102; CHECK-NEXT: retq 103 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 -1> 104 ret <4 x i32> %1 105} 106 107define <4 x i32> @test9(<4 x i32> %A) { 108; CHECK-LABEL: test9: 109; CHECK: # %bb.0: 110; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 111; CHECK-NEXT: retq 112 %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 0> 113 ret <4 x i32> %1 114} 115 116define <4 x i32> @test10(<4 x i32> %A) { 117; CHECK-LABEL: test10: 118; CHECK: # %bb.0: 119; CHECK-NEXT: xorps %xmm1, %xmm1 120; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 121; CHECK-NEXT: retq 122 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 0> 123 ret <4 x i32> %1 124} 125 126define <4 x i32> @test11(<4 x i32> %A) { 127; CHECK-LABEL: test11: 128; CHECK: # %bb.0: 129; CHECK-NEXT: xorps %xmm1, %xmm1 130; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 131; CHECK-NEXT: retq 132 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 -1> 133 ret <4 x i32> %1 134} 135 136define <4 x i32> @test12(<4 x i32> %A) { 137; CHECK-LABEL: test12: 138; CHECK: # %bb.0: 139; CHECK-NEXT: xorps %xmm1, %xmm1 140; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 141; CHECK-NEXT: retq 142 %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 0> 143 ret <4 x i32> %1 144} 145 146define <4 x i32> @test13(<4 x i32> %A) { 147; CHECK-LABEL: test13: 148; CHECK: # %bb.0: 149; CHECK-NEXT: xorps %xmm1, %xmm1 150; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 151; CHECK-NEXT: retq 152 %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 -1> 153 ret <4 x i32> %1 154} 155 156define <4 x i32> @test14(<4 x i32> %A) { 157; CHECK-LABEL: test14: 158; CHECK: # %bb.0: 159; CHECK-NEXT: xorps %xmm1, %xmm1 160; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 161; CHECK-NEXT: retq 162 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1> 163 ret <4 x i32> %1 164} 165 166; X & undef must fold to 0. So lane 0 must choose from the zero vector. 167 168define <4 x i32> @undef_lane(<4 x i32> %x) { 169; CHECK-LABEL: undef_lane: 170; CHECK: # %bb.0: 171; CHECK-NEXT: xorps %xmm1, %xmm1 172; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 173; CHECK-NEXT: retq 174 %r = and <4 x i32> %x, <i32 undef, i32 4294967295, i32 0, i32 4294967295> 175 ret <4 x i32> %r 176} 177 178define <4 x i32> @test15(<4 x i32> %A, <4 x i32> %B) { 179; CHECK-LABEL: test15: 180; CHECK: # %bb.0: 181; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 182; CHECK-NEXT: retq 183 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1> 184 %2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 0> 185 %3 = or <4 x i32> %1, %2 186 ret <4 x i32> %3 187} 188 189define <4 x i32> @test16(<4 x i32> %A, <4 x i32> %B) { 190; CHECK-LABEL: test16: 191; CHECK: # %bb.0: 192; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 193; CHECK-NEXT: retq 194 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0> 195 %2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 -1> 196 %3 = or <4 x i32> %1, %2 197 ret <4 x i32> %3 198} 199 200define <4 x i32> @test17(<4 x i32> %A, <4 x i32> %B) { 201; CHECK-LABEL: test17: 202; CHECK: # %bb.0: 203; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 204; CHECK-NEXT: retq 205 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1> 206 %2 = and <4 x i32> %B, <i32 -1, i32 0, i32 -1, i32 0> 207 %3 = or <4 x i32> %1, %2 208 ret <4 x i32> %3 209} 210 211; 212; fold (and (or x, C), D) -> D if (C & D) == D 213; 214 215define <2 x i64> @and_or_v2i64(<2 x i64> %a0) { 216; CHECK-LABEL: and_or_v2i64: 217; CHECK: # %bb.0: 218; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8,8] 219; CHECK-NEXT: retq 220 %1 = or <2 x i64> %a0, <i64 255, i64 255> 221 %2 = and <2 x i64> %1, <i64 8, i64 8> 222 ret <2 x i64> %2 223} 224 225define <4 x i32> @and_or_v4i32(<4 x i32> %a0) { 226; CHECK-LABEL: and_or_v4i32: 227; CHECK: # %bb.0: 228; CHECK-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3] 229; CHECK-NEXT: retq 230 %1 = or <4 x i32> %a0, <i32 15, i32 15, i32 15, i32 15> 231 %2 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 232 ret <4 x i32> %2 233} 234 235define <8 x i16> @and_or_v8i16(<8 x i16> %a0) { 236; CHECK-LABEL: and_or_v8i16: 237; CHECK: # %bb.0: 238; CHECK-NEXT: movaps {{.*#+}} xmm0 = [15,7,3,1,14,10,2,32767] 239; CHECK-NEXT: retq 240 %1 = or <8 x i16> %a0, <i16 255, i16 127, i16 63, i16 31, i16 15, i16 31, i16 63, i16 -1> 241 %2 = and <8 x i16> %1, <i16 15, i16 7, i16 3, i16 1, i16 14, i16 10, i16 2, i16 32767> 242 ret <8 x i16> %2 243} 244 245; 246; known bits folding 247; 248 249define <2 x i64> @and_or_zext_v2i32(<2 x i32> %a0) { 250; CHECK-LABEL: and_or_zext_v2i32: 251; CHECK: # %bb.0: 252; CHECK-NEXT: xorps %xmm0, %xmm0 253; CHECK-NEXT: retq 254 %1 = zext <2 x i32> %a0 to <2 x i64> 255 %2 = or <2 x i64> %1, <i64 1, i64 1> 256 %3 = and <2 x i64> %2, <i64 4294967296, i64 4294967296> 257 ret <2 x i64> %3 258} 259 260define <4 x i32> @and_or_zext_v4i16(<4 x i16> %a0) { 261; CHECK-LABEL: and_or_zext_v4i16: 262; CHECK: # %bb.0: 263; CHECK-NEXT: xorps %xmm0, %xmm0 264; CHECK-NEXT: retq 265 %1 = zext <4 x i16> %a0 to <4 x i32> 266 %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 267 %3 = and <4 x i32> %2, <i32 65536, i32 65536, i32 65536, i32 65536> 268 ret <4 x i32> %3 269} 270 271; 272; known sign bits folding 273; 274 275define <8 x i16> @ashr_mask1_v8i16(<8 x i16> %a0) { 276; CHECK-LABEL: ashr_mask1_v8i16: 277; CHECK: # %bb.0: 278; CHECK-NEXT: psrlw $15, %xmm0 279; CHECK-NEXT: retq 280 %1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 281 %2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 282 ret <8 x i16> %2 283} 284 285define <4 x i32> @ashr_mask7_v4i32(<4 x i32> %a0) { 286; CHECK-LABEL: ashr_mask7_v4i32: 287; CHECK: # %bb.0: 288; CHECK-NEXT: psrad $31, %xmm0 289; CHECK-NEXT: psrld $29, %xmm0 290; CHECK-NEXT: retq 291 %1 = ashr <4 x i32> %a0, <i32 31, i32 31, i32 31, i32 31> 292 %2 = and <4 x i32> %1, <i32 7, i32 7, i32 7, i32 7> 293 ret <4 x i32> %2 294} 295 296; 297; SimplifyDemandedBits 298; 299 300; PR34620 - redundant PAND after vector shift of a byte vector (PSRLW) 301define <16 x i8> @PR34620(<16 x i8> %a0, <16 x i8> %a1) { 302; CHECK-LABEL: PR34620: 303; CHECK: # %bb.0: 304; CHECK-NEXT: psrlw $1, %xmm0 305; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 306; CHECK-NEXT: paddb %xmm1, %xmm0 307; CHECK-NEXT: retq 308 %1 = lshr <16 x i8> %a0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 309 %2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 310 %3 = add <16 x i8> %2, %a1 311 ret <16 x i8> %3 312} 313