1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s 3 4define i32 @or_self(i32 %x) { 5; CHECK-LABEL: or_self: 6; CHECK: # %bb.0: 7; CHECK-NEXT: movl %edi, %eax 8; CHECK-NEXT: retq 9 %or = or i32 %x, %x 10 ret i32 %or 11} 12 13define <4 x i32> @or_self_vec(<4 x i32> %x) { 14; CHECK-LABEL: or_self_vec: 15; CHECK: # %bb.0: 16; CHECK-NEXT: retq 17 %or = or <4 x i32> %x, %x 18 ret <4 x i32> %or 19} 20 21; Verify that each of the following test cases is folded into a single 22; instruction which performs a blend operation. 23 24define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) { 25; CHECK-LABEL: test1: 26; CHECK: # %bb.0: 27; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 28; CHECK-NEXT: retq 29 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 30 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1> 31 %or = or <2 x i64> %shuf1, %shuf2 32 ret <2 x i64> %or 33} 34 35 36define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) { 37; CHECK-LABEL: test2: 38; CHECK: # %bb.0: 39; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 40; CHECK-NEXT: retq 41 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 42 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 43 %or = or <4 x i32> %shuf1, %shuf2 44 ret <4 x i32> %or 45} 46 47 48define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) { 49; CHECK-LABEL: test3: 50; CHECK: # %bb.0: 51; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 52; CHECK-NEXT: retq 53 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1> 54 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 55 %or = or <2 x i64> %shuf1, %shuf2 56 ret <2 x i64> %or 57} 58 59 60define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) { 61; CHECK-LABEL: test4: 62; CHECK: # %bb.0: 63; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 64; CHECK-NEXT: retq 65 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 66 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3> 67 %or = or <4 x i32> %shuf1, %shuf2 68 ret <4 x i32> %or 69} 70 71 72define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) { 73; CHECK-LABEL: test5: 74; CHECK: # %bb.0: 75; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 76; CHECK-NEXT: retq 77 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3> 78 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 79 %or = or <4 x i32> %shuf1, %shuf2 80 ret <4 x i32> %or 81} 82 83 84define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) { 85; CHECK-LABEL: test6: 86; CHECK: # %bb.0: 87; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 88; CHECK-NEXT: retq 89 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 90 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 91 %or = or <4 x i32> %shuf1, %shuf2 92 ret <4 x i32> %or 93} 94 95 96define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) { 97; CHECK-LABEL: test7: 98; CHECK: # %bb.0: 99; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 100; CHECK-NEXT: retq 101 %and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0> 102 %and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1> 103 %or = or <4 x i32> %and1, %and2 104 ret <4 x i32> %or 105} 106 107 108define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) { 109; CHECK-LABEL: test8: 110; CHECK: # %bb.0: 111; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 112; CHECK-NEXT: retq 113 %and1 = and <2 x i64> %a, <i64 -1, i64 0> 114 %and2 = and <2 x i64> %b, <i64 0, i64 -1> 115 %or = or <2 x i64> %and1, %and2 116 ret <2 x i64> %or 117} 118 119 120define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) { 121; CHECK-LABEL: test9: 122; CHECK: # %bb.0: 123; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 124; CHECK-NEXT: retq 125 %and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1> 126 %and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0> 127 %or = or <4 x i32> %and1, %and2 128 ret <4 x i32> %or 129} 130 131 132define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) { 133; CHECK-LABEL: test10: 134; CHECK: # %bb.0: 135; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 136; CHECK-NEXT: retq 137 %and1 = and <2 x i64> %a, <i64 0, i64 -1> 138 %and2 = and <2 x i64> %b, <i64 -1, i64 0> 139 %or = or <2 x i64> %and1, %and2 140 ret <2 x i64> %or 141} 142 143 144define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) { 145; CHECK-LABEL: test11: 146; CHECK: # %bb.0: 147; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 148; CHECK-NEXT: retq 149 %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0> 150 %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1> 151 %or = or <4 x i32> %and1, %and2 152 ret <4 x i32> %or 153} 154 155 156define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) { 157; CHECK-LABEL: test12: 158; CHECK: # %bb.0: 159; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 160; CHECK-NEXT: retq 161 %and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1> 162 %and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0> 163 %or = or <4 x i32> %and1, %and2 164 ret <4 x i32> %or 165} 166 167 168; Verify that the following test cases are folded into single shuffles. 169 170define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) { 171; CHECK-LABEL: test13: 172; CHECK: # %bb.0: 173; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] 174; CHECK-NEXT: retq 175 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4> 176 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 177 %or = or <4 x i32> %shuf1, %shuf2 178 ret <4 x i32> %or 179} 180 181 182define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) { 183; CHECK-LABEL: test14: 184; CHECK: # %bb.0: 185; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 186; CHECK-NEXT: retq 187 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 188 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 189 %or = or <2 x i64> %shuf1, %shuf2 190 ret <2 x i64> %or 191} 192 193 194define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) { 195; CHECK-LABEL: test15: 196; CHECK: # %bb.0: 197; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,1],xmm0[2,1] 198; CHECK-NEXT: movaps %xmm1, %xmm0 199; CHECK-NEXT: retq 200 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1> 201 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4> 202 %or = or <4 x i32> %shuf1, %shuf2 203 ret <4 x i32> %or 204} 205 206 207define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) { 208; CHECK-LABEL: test16: 209; CHECK: # %bb.0: 210; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 211; CHECK-NEXT: movaps %xmm1, %xmm0 212; CHECK-NEXT: retq 213 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 214 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 215 %or = or <2 x i64> %shuf1, %shuf2 216 ret <2 x i64> %or 217} 218 219 220; Verify that the dag-combiner does not fold a OR of two shuffles into a single 221; shuffle instruction when the shuffle indexes are not compatible. 222 223define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) { 224; CHECK-LABEL: test17: 225; CHECK: # %bb.0: 226; CHECK-NEXT: psllq $32, %xmm0 227; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero 228; CHECK-NEXT: por %xmm1, %xmm0 229; CHECK-NEXT: retq 230 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2> 231 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 232 %or = or <4 x i32> %shuf1, %shuf2 233 ret <4 x i32> %or 234} 235 236 237define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) { 238; CHECK-LABEL: test18: 239; CHECK: # %bb.0: 240; CHECK-NEXT: pxor %xmm2, %xmm2 241; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7] 242; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] 243; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] 244; CHECK-NEXT: por %xmm1, %xmm0 245; CHECK-NEXT: retq 246 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4> 247 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 248 %or = or <4 x i32> %shuf1, %shuf2 249 ret <4 x i32> %or 250} 251 252 253define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) { 254; CHECK-LABEL: test19: 255; CHECK: # %bb.0: 256; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,3] 257; CHECK-NEXT: pxor %xmm3, %xmm3 258; CHECK-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 259; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2] 260; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5,6,7] 261; CHECK-NEXT: por %xmm2, %xmm0 262; CHECK-NEXT: retq 263 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3> 264 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2> 265 %or = or <4 x i32> %shuf1, %shuf2 266 ret <4 x i32> %or 267} 268 269 270define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) { 271; CHECK-LABEL: test20: 272; CHECK: # %bb.0: 273; CHECK-NEXT: por %xmm1, %xmm0 274; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 275; CHECK-NEXT: retq 276 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 277 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 278 %or = or <2 x i64> %shuf1, %shuf2 279 ret <2 x i64> %or 280} 281 282 283define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) { 284; CHECK-LABEL: test21: 285; CHECK: # %bb.0: 286; CHECK-NEXT: por %xmm1, %xmm0 287; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 288; CHECK-NEXT: retq 289 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 290 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 291 %or = or <2 x i64> %shuf1, %shuf2 292 ret <2 x i64> %or 293} 294 295 296; Verify that the dag-combiner keeps the correct domain for float/double vectors 297; bitcast to use the mask-or blend combine. 298 299define <2 x double> @test22(<2 x double> %a0, <2 x double> %a1) { 300; CHECK-LABEL: test22: 301; CHECK: # %bb.0: 302; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 303; CHECK-NEXT: retq 304 %bc1 = bitcast <2 x double> %a0 to <2 x i64> 305 %bc2 = bitcast <2 x double> %a1 to <2 x i64> 306 %and1 = and <2 x i64> %bc1, <i64 0, i64 -1> 307 %and2 = and <2 x i64> %bc2, <i64 -1, i64 0> 308 %or = or <2 x i64> %and1, %and2 309 %bc3 = bitcast <2 x i64> %or to <2 x double> 310 ret <2 x double> %bc3 311} 312 313 314define <4 x float> @test23(<4 x float> %a0, <4 x float> %a1) { 315; CHECK-LABEL: test23: 316; CHECK: # %bb.0: 317; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 318; CHECK-NEXT: retq 319 %bc1 = bitcast <4 x float> %a0 to <4 x i32> 320 %bc2 = bitcast <4 x float> %a1 to <4 x i32> 321 %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0> 322 %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1> 323 %or = or <4 x i32> %and1, %and2 324 %bc3 = bitcast <4 x i32> %or to <4 x float> 325 ret <4 x float> %bc3 326} 327 328 329define <4 x float> @test24(<4 x float> %a0, <4 x float> %a1) { 330; CHECK-LABEL: test24: 331; CHECK: # %bb.0: 332; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 333; CHECK-NEXT: retq 334 %bc1 = bitcast <4 x float> %a0 to <2 x i64> 335 %bc2 = bitcast <4 x float> %a1 to <2 x i64> 336 %and1 = and <2 x i64> %bc1, <i64 0, i64 -1> 337 %and2 = and <2 x i64> %bc2, <i64 -1, i64 0> 338 %or = or <2 x i64> %and1, %and2 339 %bc3 = bitcast <2 x i64> %or to <4 x float> 340 ret <4 x float> %bc3 341} 342 343 344define <4 x float> @test25(<4 x float> %a0) { 345; CHECK-LABEL: test25: 346; CHECK: # %bb.0: 347; CHECK-NEXT: blendps {{.*#+}} xmm0 = mem[0],xmm0[1,2],mem[3] 348; CHECK-NEXT: retq 349 %bc1 = bitcast <4 x float> %a0 to <4 x i32> 350 %bc2 = bitcast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> to <4 x i32> 351 %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0> 352 %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1> 353 %or = or <4 x i32> %and1, %and2 354 %bc3 = bitcast <4 x i32> %or to <4 x float> 355 ret <4 x float> %bc3 356} 357 358 359; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle 360; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to 361; handle legal vector value types. 362define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) { 363; CHECK-LABEL: test_crash: 364; CHECK: # %bb.0: 365; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 366; CHECK-NEXT: retq 367 %shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 368 %shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 369 %or = or <4 x i8> %shuf1, %shuf2 370 ret <4 x i8> %or 371} 372 373; Verify that we can fold regardless of which operand is the zeroinitializer 374 375define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b) { 376; CHECK-LABEL: test2b: 377; CHECK: # %bb.0: 378; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 379; CHECK-NEXT: retq 380 %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7> 381 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 382 %or = or <4 x i32> %shuf1, %shuf2 383 ret <4 x i32> %or 384} 385 386define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b) { 387; CHECK-LABEL: test2c: 388; CHECK: # %bb.0: 389; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 390; CHECK-NEXT: retq 391 %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7> 392 %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0> 393 %or = or <4 x i32> %shuf1, %shuf2 394 ret <4 x i32> %or 395} 396 397 398define <4 x i32> @test2d(<4 x i32> %a, <4 x i32> %b) { 399; CHECK-LABEL: test2d: 400; CHECK: # %bb.0: 401; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 402; CHECK-NEXT: retq 403 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 404 %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0> 405 %or = or <4 x i32> %shuf1, %shuf2 406 ret <4 x i32> %or 407} 408 409; Make sure we can have an undef where an index pointing to the zero vector should be 410 411define <4 x i32> @test2e(<4 x i32> %a, <4 x i32> %b) { 412; CHECK-LABEL: test2e: 413; CHECK: # %bb.0: 414; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 415; CHECK-NEXT: retq 416 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 4, i32 2, i32 3> 417 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 0, i32 1, i32 4, i32 4> 418 %or = or <4 x i32> %shuf1, %shuf2 419 ret <4 x i32> %or 420} 421 422define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) { 423; CHECK-LABEL: test2f: 424; CHECK: # %bb.0: 425; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 426; CHECK-NEXT: retq 427 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 4, i32 4, i32 2, i32 3> 428 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 1, i32 4, i32 4> 429 %or = or <4 x i32> %shuf1, %shuf2 430 ret <4 x i32> %or 431} 432 433; (or (and X, c1), c2) -> (and (or X, c2), c1|c2) iff (c1 & c2) != 0 434 435define <2 x i64> @or_and_v2i64(<2 x i64> %a0) { 436; CHECK-LABEL: or_and_v2i64: 437; CHECK: # %bb.0: 438; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 439; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 440; CHECK-NEXT: retq 441 %1 = and <2 x i64> %a0, <i64 7, i64 7> 442 %2 = or <2 x i64> %1, <i64 3, i64 3> 443 ret <2 x i64> %2 444} 445 446define <4 x i32> @or_and_v4i32(<4 x i32> %a0) { 447; CHECK-LABEL: or_and_v4i32: 448; CHECK: # %bb.0: 449; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 450; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 451; CHECK-NEXT: retq 452 %1 = and <4 x i32> %a0, <i32 1, i32 3, i32 5, i32 7> 453 %2 = or <4 x i32> %1, <i32 3, i32 2, i32 15, i32 2> 454 ret <4 x i32> %2 455} 456 457; If all masked bits are going to be set, that's a constant fold. 458 459define <4 x i32> @or_and_v4i32_fold(<4 x i32> %a0) { 460; CHECK-LABEL: or_and_v4i32_fold: 461; CHECK: # %bb.0: 462; CHECK-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3] 463; CHECK-NEXT: retq 464 %1 = and <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1> 465 %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 466 ret <4 x i32> %2 467} 468 469; fold (or x, c) -> c iff (x & ~c) == 0 470 471define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) { 472; CHECK-LABEL: or_zext_v2i32: 473; CHECK: # %bb.0: 474; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295] 475; CHECK-NEXT: retq 476 %1 = zext <2 x i32> %a0 to <2 x i64> 477 %2 = or <2 x i64> %1, <i64 4294967295, i64 4294967295> 478 ret <2 x i64> %2 479} 480 481define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) { 482; CHECK-LABEL: or_zext_v4i16: 483; CHECK: # %bb.0: 484; CHECK-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535] 485; CHECK-NEXT: retq 486 %1 = zext <4 x i16> %a0 to <4 x i32> 487 %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535> 488 ret <4 x i32> %2 489} 490 491