1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s 3 4 5; Verify that each of the following test cases is folded into a single 6; instruction which performs a blend operation. 7 8define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) { 9; CHECK-LABEL: test1: 10; CHECK: # BB#0: 11; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 12; CHECK-NEXT: retq 13 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 14 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1> 15 %or = or <2 x i64> %shuf1, %shuf2 16 ret <2 x i64> %or 17} 18 19 20define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) { 21; CHECK-LABEL: test2: 22; CHECK: # BB#0: 23; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 24; CHECK-NEXT: retq 25 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 26 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 27 %or = or <4 x i32> %shuf1, %shuf2 28 ret <4 x i32> %or 29} 30 31 32define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) { 33; CHECK-LABEL: test3: 34; CHECK: # BB#0: 35; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 36; CHECK-NEXT: retq 37 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1> 38 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 39 %or = or <2 x i64> %shuf1, %shuf2 40 ret <2 x i64> %or 41} 42 43 44define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) { 45; CHECK-LABEL: test4: 46; CHECK: # BB#0: 47; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 48; CHECK-NEXT: retq 49 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 50 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3> 51 %or = or <4 x i32> %shuf1, %shuf2 52 ret <4 x i32> %or 53} 54 55 56define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) { 57; CHECK-LABEL: test5: 58; CHECK: # BB#0: 59; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 60; CHECK-NEXT: retq 61 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3> 62 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 63 %or = or <4 x i32> %shuf1, %shuf2 64 ret <4 x i32> %or 65} 66 67 68define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) { 69; CHECK-LABEL: test6: 70; CHECK: # BB#0: 71; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 72; CHECK-NEXT: retq 73 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 74 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 75 %or = or <4 x i32> %shuf1, %shuf2 76 ret <4 x i32> %or 77} 78 79 80define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) { 81; CHECK-LABEL: test7: 82; CHECK: # BB#0: 83; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 84; CHECK-NEXT: retq 85 %and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0> 86 %and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1> 87 %or = or <4 x i32> %and1, %and2 88 ret <4 x i32> %or 89} 90 91 92define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) { 93; CHECK-LABEL: test8: 94; CHECK: # BB#0: 95; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 96; CHECK-NEXT: retq 97 %and1 = and <2 x i64> %a, <i64 -1, i64 0> 98 %and2 = and <2 x i64> %b, <i64 0, i64 -1> 99 %or = or <2 x i64> %and1, %and2 100 ret <2 x i64> %or 101} 102 103 104define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) { 105; CHECK-LABEL: test9: 106; CHECK: # BB#0: 107; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 108; CHECK-NEXT: retq 109 %and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1> 110 %and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0> 111 %or = or <4 x i32> %and1, %and2 112 ret <4 x i32> %or 113} 114 115 116define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) { 117; CHECK-LABEL: test10: 118; CHECK: # BB#0: 119; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 120; CHECK-NEXT: retq 121 %and1 = and <2 x i64> %a, <i64 0, i64 -1> 122 %and2 = and <2 x i64> %b, <i64 -1, i64 0> 123 %or = or <2 x i64> %and1, %and2 124 ret <2 x i64> %or 125} 126 127 128define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) { 129; CHECK-LABEL: test11: 130; CHECK: # BB#0: 131; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 132; CHECK-NEXT: retq 133 %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0> 134 %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1> 135 %or = or <4 x i32> %and1, %and2 136 ret <4 x i32> %or 137} 138 139 140define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) { 141; CHECK-LABEL: test12: 142; CHECK: # BB#0: 143; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 144; CHECK-NEXT: retq 145 %and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1> 146 %and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0> 147 %or = or <4 x i32> %and1, %and2 148 ret <4 x i32> %or 149} 150 151 152; Verify that the following test cases are folded into single shuffles. 153 154define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) { 155; CHECK-LABEL: test13: 156; CHECK: # BB#0: 157; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 158; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 159; CHECK-NEXT: retq 160 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4> 161 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 162 %or = or <4 x i32> %shuf1, %shuf2 163 ret <4 x i32> %or 164} 165 166 167define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) { 168; CHECK-LABEL: test14: 169; CHECK: # BB#0: 170; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 171; CHECK-NEXT: retq 172 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 173 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 174 %or = or <2 x i64> %shuf1, %shuf2 175 ret <2 x i64> %or 176} 177 178 179define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) { 180; CHECK-LABEL: test15: 181; CHECK: # BB#0: 182; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,2,1] 183; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,1,2,3] 184; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 185; CHECK-NEXT: retq 186 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1> 187 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4> 188 %or = or <4 x i32> %shuf1, %shuf2 189 ret <4 x i32> %or 190} 191 192 193define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) { 194; CHECK-LABEL: test16: 195; CHECK: # BB#0: 196; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 197; CHECK-NEXT: movdqa %xmm1, %xmm0 198; CHECK-NEXT: retq 199 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 200 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 201 %or = or <2 x i64> %shuf1, %shuf2 202 ret <2 x i64> %or 203} 204 205 206; Verify that the dag-combiner does not fold a OR of two shuffles into a single 207; shuffle instruction when the shuffle indexes are not compatible. 208 209define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) { 210; CHECK-LABEL: test17: 211; CHECK: # BB#0: 212; CHECK-NEXT: psllq $32, %xmm0 213; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero 214; CHECK-NEXT: por %xmm1, %xmm0 215; CHECK-NEXT: retq 216 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2> 217 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 218 %or = or <4 x i32> %shuf1, %shuf2 219 ret <4 x i32> %or 220} 221 222 223define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) { 224; CHECK-LABEL: test18: 225; CHECK: # BB#0: 226; CHECK-NEXT: pxor %xmm2, %xmm2 227; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7] 228; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] 229; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] 230; CHECK-NEXT: por %xmm1, %xmm0 231; CHECK-NEXT: retq 232 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4> 233 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 234 %or = or <4 x i32> %shuf1, %shuf2 235 ret <4 x i32> %or 236} 237 238 239define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) { 240; CHECK-LABEL: test19: 241; CHECK: # BB#0: 242; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,3] 243; CHECK-NEXT: pxor %xmm3, %xmm3 244; CHECK-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 245; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2] 246; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5,6,7] 247; CHECK-NEXT: por %xmm2, %xmm0 248; CHECK-NEXT: retq 249 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3> 250 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2> 251 %or = or <4 x i32> %shuf1, %shuf2 252 ret <4 x i32> %or 253} 254 255 256define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) { 257; CHECK-LABEL: test20: 258; CHECK: # BB#0: 259; CHECK-NEXT: por %xmm1, %xmm0 260; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 261; CHECK-NEXT: retq 262 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 263 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 264 %or = or <2 x i64> %shuf1, %shuf2 265 ret <2 x i64> %or 266} 267 268 269define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) { 270; CHECK-LABEL: test21: 271; CHECK: # BB#0: 272; CHECK-NEXT: por %xmm1, %xmm0 273; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 274; CHECK-NEXT: retq 275 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 276 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 277 %or = or <2 x i64> %shuf1, %shuf2 278 ret <2 x i64> %or 279} 280 281 282; Verify that the dag-combiner keeps the correct domain for float/double vectors 283; bitcast to use the mask-or blend combine. 284 285define <2 x double> @test22(<2 x double> %a0, <2 x double> %a1) { 286; CHECK-LABEL: test22: 287; CHECK: # BB#0: 288; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 289; CHECK-NEXT: retq 290 %bc1 = bitcast <2 x double> %a0 to <2 x i64> 291 %bc2 = bitcast <2 x double> %a1 to <2 x i64> 292 %and1 = and <2 x i64> %bc1, <i64 0, i64 -1> 293 %and2 = and <2 x i64> %bc2, <i64 -1, i64 0> 294 %or = or <2 x i64> %and1, %and2 295 %bc3 = bitcast <2 x i64> %or to <2 x double> 296 ret <2 x double> %bc3 297} 298 299 300define <4 x float> @test23(<4 x float> %a0, <4 x float> %a1) { 301; CHECK-LABEL: test23: 302; CHECK: # BB#0: 303; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 304; CHECK-NEXT: retq 305 %bc1 = bitcast <4 x float> %a0 to <4 x i32> 306 %bc2 = bitcast <4 x float> %a1 to <4 x i32> 307 %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0> 308 %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1> 309 %or = or <4 x i32> %and1, %and2 310 %bc3 = bitcast <4 x i32> %or to <4 x float> 311 ret <4 x float> %bc3 312} 313 314 315define <4 x float> @test24(<4 x float> %a0, <4 x float> %a1) { 316; CHECK-LABEL: test24: 317; CHECK: # BB#0: 318; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 319; CHECK-NEXT: retq 320 %bc1 = bitcast <4 x float> %a0 to <2 x i64> 321 %bc2 = bitcast <4 x float> %a1 to <2 x i64> 322 %and1 = and <2 x i64> %bc1, <i64 0, i64 -1> 323 %and2 = and <2 x i64> %bc2, <i64 -1, i64 0> 324 %or = or <2 x i64> %and1, %and2 325 %bc3 = bitcast <2 x i64> %or to <4 x float> 326 ret <4 x float> %bc3 327} 328 329 330define <4 x float> @test25(<4 x float> %a0) { 331; CHECK-LABEL: test25: 332; CHECK: # BB#0: 333; CHECK-NEXT: blendps {{.*#+}} xmm0 = mem[0],xmm0[1,2],mem[3] 334; CHECK-NEXT: retq 335 %bc1 = bitcast <4 x float> %a0 to <4 x i32> 336 %bc2 = bitcast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> to <4 x i32> 337 %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0> 338 %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1> 339 %or = or <4 x i32> %and1, %and2 340 %bc3 = bitcast <4 x i32> %or to <4 x float> 341 ret <4 x float> %bc3 342} 343 344 345; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle 346; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to 347; handle legal vector value types. 348define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) { 349; CHECK-LABEL: test_crash: 350; CHECK: # BB#0: 351; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 352; CHECK-NEXT: retq 353 %shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 354 %shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 355 %or = or <4 x i8> %shuf1, %shuf2 356 ret <4 x i8> %or 357} 358 359; Verify that we can fold regardless of which operand is the zeroinitializer 360 361define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b) { 362; CHECK-LABEL: test2b: 363; CHECK: # BB#0: 364; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 365; CHECK-NEXT: retq 366 %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7> 367 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 368 %or = or <4 x i32> %shuf1, %shuf2 369 ret <4 x i32> %or 370} 371 372define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b) { 373; CHECK-LABEL: test2c: 374; CHECK: # BB#0: 375; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 376; CHECK-NEXT: retq 377 %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7> 378 %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0> 379 %or = or <4 x i32> %shuf1, %shuf2 380 ret <4 x i32> %or 381} 382 383 384define <4 x i32> @test2d(<4 x i32> %a, <4 x i32> %b) { 385; CHECK-LABEL: test2d: 386; CHECK: # BB#0: 387; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 388; CHECK-NEXT: retq 389 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 390 %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0> 391 %or = or <4 x i32> %shuf1, %shuf2 392 ret <4 x i32> %or 393} 394 395; Make sure we can have an undef where an index pointing to the zero vector should be 396 397define <4 x i32> @test2e(<4 x i32> %a, <4 x i32> %b) { 398; CHECK-LABEL: test2e: 399; CHECK: # BB#0: 400; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 401; CHECK-NEXT: retq 402 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 4, i32 2, i32 3> 403 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 0, i32 1, i32 4, i32 4> 404 %or = or <4 x i32> %shuf1, %shuf2 405 ret <4 x i32> %or 406} 407 408define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) { 409; CHECK-LABEL: test2f: 410; CHECK: # BB#0: 411; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 412; CHECK-NEXT: retq 413 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 4, i32 4, i32 2, i32 3> 414 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 1, i32 4, i32 4> 415 %or = or <4 x i32> %shuf1, %shuf2 416 ret <4 x i32> %or 417} 418