1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s 3 4define i32 @and_self(i32 %x) { 5; CHECK-LABEL: and_self: 6; CHECK: # %bb.0: 7; CHECK-NEXT: movl %edi, %eax 8; CHECK-NEXT: retq 9 %and = and i32 %x, %x 10 ret i32 %and 11} 12 13define <4 x i32> @and_self_vec(<4 x i32> %x) { 14; CHECK-LABEL: and_self_vec: 15; CHECK: # %bb.0: 16; CHECK-NEXT: retq 17 %and = and <4 x i32> %x, %x 18 ret <4 x i32> %and 19} 20 21; 22; Verify that the DAGCombiner is able to fold a vector AND into a blend 23; if one of the operands to the AND is a vector of all constants, and each 24; constant element is either zero or all-ones. 25; 26 27define <4 x i32> @test1(<4 x i32> %A) { 28; CHECK-LABEL: test1: 29; CHECK: # %bb.0: 30; CHECK-NEXT: xorps %xmm1, %xmm1 31; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 32; CHECK-NEXT: retq 33 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 0> 34 ret <4 x i32> %1 35} 36 37define <4 x i32> @test2(<4 x i32> %A) { 38; CHECK-LABEL: test2: 39; CHECK: # %bb.0: 40; CHECK-NEXT: xorps %xmm1, %xmm1 41; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 42; CHECK-NEXT: retq 43 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 0> 44 ret <4 x i32> %1 45} 46 47define <4 x i32> @test3(<4 x i32> %A) { 48; CHECK-LABEL: test3: 49; CHECK: # %bb.0: 50; CHECK-NEXT: xorps %xmm1, %xmm1 51; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3] 52; CHECK-NEXT: retq 53 %1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 0> 54 ret <4 x i32> %1 55} 56 57define <4 x i32> @test4(<4 x i32> %A) { 58; CHECK-LABEL: test4: 59; CHECK: # %bb.0: 60; CHECK-NEXT: xorps %xmm1, %xmm1 61; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 62; CHECK-NEXT: retq 63 %1 = and <4 x i32> %A, <i32 0, i32 0, i32 0, i32 -1> 64 ret <4 x i32> %1 65} 66 67define <4 x i32> @test5(<4 x i32> %A) { 68; CHECK-LABEL: test5: 69; CHECK: # %bb.0: 70; CHECK-NEXT: xorps %xmm1, %xmm1 71; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 72; CHECK-NEXT: retq 73 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0> 74 ret <4 x i32> %1 75} 76 77define <4 x i32> @test6(<4 x i32> %A) { 78; CHECK-LABEL: test6: 79; CHECK: # %bb.0: 80; CHECK-NEXT: xorps %xmm1, %xmm1 81; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 82; CHECK-NEXT: retq 83 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1> 84 ret <4 x i32> %1 85} 86 87define <4 x i32> @test7(<4 x i32> %A) { 88; CHECK-LABEL: test7: 89; CHECK: # %bb.0: 90; CHECK-NEXT: xorps %xmm1, %xmm1 91; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 92; CHECK-NEXT: retq 93 %1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 -1> 94 ret <4 x i32> %1 95} 96 97define <4 x i32> @test8(<4 x i32> %A) { 98; CHECK-LABEL: test8: 99; CHECK: # %bb.0: 100; CHECK-NEXT: xorps %xmm1, %xmm1 101; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 102; CHECK-NEXT: retq 103 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 -1> 104 ret <4 x i32> %1 105} 106 107define <4 x i32> @test9(<4 x i32> %A) { 108; CHECK-LABEL: test9: 109; CHECK: # %bb.0: 110; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 111; CHECK-NEXT: retq 112 %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 0> 113 ret <4 x i32> %1 114} 115 116define <4 x i32> @test10(<4 x i32> %A) { 117; CHECK-LABEL: test10: 118; CHECK: # %bb.0: 119; CHECK-NEXT: xorps %xmm1, %xmm1 120; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 121; CHECK-NEXT: retq 122 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 0> 123 ret <4 x i32> %1 124} 125 126define <4 x i32> @test11(<4 x i32> %A) { 127; CHECK-LABEL: test11: 128; CHECK: # %bb.0: 129; CHECK-NEXT: xorps %xmm1, %xmm1 130; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 131; CHECK-NEXT: retq 132 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 -1> 133 ret <4 x i32> %1 134} 135 136define <4 x i32> @test12(<4 x i32> %A) { 137; CHECK-LABEL: test12: 138; CHECK: # %bb.0: 139; CHECK-NEXT: xorps %xmm1, %xmm1 140; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 141; CHECK-NEXT: retq 142 %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 0> 143 ret <4 x i32> %1 144} 145 146define <4 x i32> @test13(<4 x i32> %A) { 147; CHECK-LABEL: test13: 148; CHECK: # %bb.0: 149; CHECK-NEXT: xorps %xmm1, %xmm1 150; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 151; CHECK-NEXT: retq 152 %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 -1> 153 ret <4 x i32> %1 154} 155 156define <4 x i32> @test14(<4 x i32> %A) { 157; CHECK-LABEL: test14: 158; CHECK: # %bb.0: 159; CHECK-NEXT: xorps %xmm1, %xmm1 160; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 161; CHECK-NEXT: retq 162 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1> 163 ret <4 x i32> %1 164} 165 166define <4 x i32> @test15(<4 x i32> %A, <4 x i32> %B) { 167; CHECK-LABEL: test15: 168; CHECK: # %bb.0: 169; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 170; CHECK-NEXT: retq 171 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1> 172 %2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 0> 173 %3 = or <4 x i32> %1, %2 174 ret <4 x i32> %3 175} 176 177define <4 x i32> @test16(<4 x i32> %A, <4 x i32> %B) { 178; CHECK-LABEL: test16: 179; CHECK: # %bb.0: 180; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 181; CHECK-NEXT: retq 182 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0> 183 %2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 -1> 184 %3 = or <4 x i32> %1, %2 185 ret <4 x i32> %3 186} 187 188define <4 x i32> @test17(<4 x i32> %A, <4 x i32> %B) { 189; CHECK-LABEL: test17: 190; CHECK: # %bb.0: 191; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 192; CHECK-NEXT: retq 193 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1> 194 %2 = and <4 x i32> %B, <i32 -1, i32 0, i32 -1, i32 0> 195 %3 = or <4 x i32> %1, %2 196 ret <4 x i32> %3 197} 198 199; 200; fold (and (or x, C), D) -> D if (C & D) == D 201; 202 203define <2 x i64> @and_or_v2i64(<2 x i64> %a0) { 204; CHECK-LABEL: and_or_v2i64: 205; CHECK: # %bb.0: 206; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8,8] 207; CHECK-NEXT: retq 208 %1 = or <2 x i64> %a0, <i64 255, i64 255> 209 %2 = and <2 x i64> %1, <i64 8, i64 8> 210 ret <2 x i64> %2 211} 212 213define <4 x i32> @and_or_v4i32(<4 x i32> %a0) { 214; CHECK-LABEL: and_or_v4i32: 215; CHECK: # %bb.0: 216; CHECK-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3] 217; CHECK-NEXT: retq 218 %1 = or <4 x i32> %a0, <i32 15, i32 15, i32 15, i32 15> 219 %2 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 220 ret <4 x i32> %2 221} 222 223define <8 x i16> @and_or_v8i16(<8 x i16> %a0) { 224; CHECK-LABEL: and_or_v8i16: 225; CHECK: # %bb.0: 226; CHECK-NEXT: movaps {{.*#+}} xmm0 = [15,7,3,1,14,10,2,32767] 227; CHECK-NEXT: retq 228 %1 = or <8 x i16> %a0, <i16 255, i16 127, i16 63, i16 31, i16 15, i16 31, i16 63, i16 -1> 229 %2 = and <8 x i16> %1, <i16 15, i16 7, i16 3, i16 1, i16 14, i16 10, i16 2, i16 32767> 230 ret <8 x i16> %2 231} 232 233; 234; known bits folding 235; 236 237define <2 x i64> @and_or_zext_v2i32(<2 x i32> %a0) { 238; CHECK-LABEL: and_or_zext_v2i32: 239; CHECK: # %bb.0: 240; CHECK-NEXT: xorps %xmm0, %xmm0 241; CHECK-NEXT: retq 242 %1 = zext <2 x i32> %a0 to <2 x i64> 243 %2 = or <2 x i64> %1, <i64 1, i64 1> 244 %3 = and <2 x i64> %2, <i64 4294967296, i64 4294967296> 245 ret <2 x i64> %3 246} 247 248define <4 x i32> @and_or_zext_v4i16(<4 x i16> %a0) { 249; CHECK-LABEL: and_or_zext_v4i16: 250; CHECK: # %bb.0: 251; CHECK-NEXT: xorps %xmm0, %xmm0 252; CHECK-NEXT: retq 253 %1 = zext <4 x i16> %a0 to <4 x i32> 254 %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 255 %3 = and <4 x i32> %2, <i32 65536, i32 65536, i32 65536, i32 65536> 256 ret <4 x i32> %3 257} 258 259; 260; known sign bits folding 261; 262 263define <8 x i16> @ashr_mask1_v8i16(<8 x i16> %a0) { 264; CHECK-LABEL: ashr_mask1_v8i16: 265; CHECK: # %bb.0: 266; CHECK-NEXT: psrlw $15, %xmm0 267; CHECK-NEXT: retq 268 %1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 269 %2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 270 ret <8 x i16> %2 271} 272 273define <4 x i32> @ashr_mask7_v4i32(<4 x i32> %a0) { 274; CHECK-LABEL: ashr_mask7_v4i32: 275; CHECK: # %bb.0: 276; CHECK-NEXT: psrad $31, %xmm0 277; CHECK-NEXT: psrld $29, %xmm0 278; CHECK-NEXT: retq 279 %1 = ashr <4 x i32> %a0, <i32 31, i32 31, i32 31, i32 31> 280 %2 = and <4 x i32> %1, <i32 7, i32 7, i32 7, i32 7> 281 ret <4 x i32> %2 282} 283 284; 285; SimplifyDemandedBits 286; 287 288; PR34620 - redundant PAND after vector shift of a byte vector (PSRLW) 289define <16 x i8> @PR34620(<16 x i8> %a0, <16 x i8> %a1) { 290; CHECK-LABEL: PR34620: 291; CHECK: # %bb.0: 292; CHECK-NEXT: psrlw $1, %xmm0 293; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 294; CHECK-NEXT: paddb %xmm1, %xmm0 295; CHECK-NEXT: retq 296 %1 = lshr <16 x i8> %a0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 297 %2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 298 %3 = add <16 x i8> %2, %a1 299 ret <16 x i8> %3 300} 301