1; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s 2 3; Verify that the DAGCombiner correctly folds according to the following rules: 4 5; fold (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) 6; fold (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) 7; fold (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) 8 9; fold (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) 10; fold (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) 11; fold (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) 12 13 14 15define <4 x i32> @test1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 16 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 17 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 18 %and = and <4 x i32> %shuf1, %shuf2 19 ret <4 x i32> %and 20} 21; CHECK-LABEL: test1 22; CHECK-NOT: pshufd 23; CHECK: pand 24; CHECK-NEXT: pshufd 25; CHECK-NEXT: ret 26 27 28define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 29 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 30 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 31 %or = or <4 x i32> %shuf1, %shuf2 32 ret <4 x i32> %or 33} 34; CHECK-LABEL: test2 35; CHECK-NOT: pshufd 36; CHECK: por 37; CHECK-NEXT: pshufd 38; CHECK-NEXT: ret 39 40 41define <4 x i32> @test3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 42 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 43 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 44 %xor = xor <4 x i32> %shuf1, %shuf2 45 ret <4 x i32> %xor 46} 47; CHECK-LABEL: test3 48; CHECK-NOT: pshufd 49; CHECK: pxor 50; CHECK-NEXT: pshufd 51; CHECK-NEXT: ret 52 53 54define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 55 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7> 56 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7> 57 %and = and <4 x i32> %shuf1, %shuf2 58 ret <4 x i32> %and 59} 60; CHECK-LABEL: test4 61; CHECK-NOT: pshufd 62; CHECK: pand 63; CHECK-NEXT: pshufd 64; CHECK-NEXT: ret 65 66 67define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 68 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7> 69 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7> 70 %or = or <4 x i32> %shuf1, %shuf2 71 ret <4 x i32> %or 72} 73; CHECK-LABEL: test5 74; CHECK-NOT: pshufd 75; CHECK: por 76; CHECK-NEXT: pshufd 77; CHECK-NEXT: ret 78 79 80define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 81 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7> 82 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7> 83 %xor = xor <4 x i32> %shuf1, %shuf2 84 ret <4 x i32> %xor 85} 86; CHECK-LABEL: test6 87; CHECK-NOT: pshufd 88; CHECK: pxor 89; CHECK-NEXT: pshufd 90; CHECK-NEXT: ret 91 92 93; Verify that DAGCombiner moves the shuffle after the xor/and/or even if shuffles 94; are not performing a swizzle operations. 95 96define <4 x i32> @test1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 97 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 98 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 99 %and = and <4 x i32> %shuf1, %shuf2 100 ret <4 x i32> %and 101} 102; CHECK-LABEL: test1b 103; CHECK-NOT: blendps 104; CHECK: andps 105; CHECK-NEXT: blendps 106; CHECK-NEXT: ret 107 108 109define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 110 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 111 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 112 %or = or <4 x i32> %shuf1, %shuf2 113 ret <4 x i32> %or 114} 115; CHECK-LABEL: test2b 116; CHECK-NOT: blendps 117; CHECK: orps 118; CHECK-NEXT: blendps 119; CHECK-NEXT: ret 120 121 122define <4 x i32> @test3b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 123 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 124 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 125 %xor = xor <4 x i32> %shuf1, %shuf2 126 ret <4 x i32> %xor 127} 128; CHECK-LABEL: test3b 129; CHECK-NOT: blendps 130; CHECK: xorps 131; CHECK-NEXT: xorps 132; CHECK-NEXT: blendps 133; CHECK-NEXT: ret 134 135 136define <4 x i32> @test4b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 137 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7> 138 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7> 139 %and = and <4 x i32> %shuf1, %shuf2 140 ret <4 x i32> %and 141} 142; CHECK-LABEL: test4b 143; CHECK-NOT: blendps 144; CHECK: andps 145; CHECK-NEXT: blendps 146; CHECK: ret 147 148 149define <4 x i32> @test5b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 150 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7> 151 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7> 152 %or = or <4 x i32> %shuf1, %shuf2 153 ret <4 x i32> %or 154} 155; CHECK-LABEL: test5b 156; CHECK-NOT: blendps 157; CHECK: orps 158; CHECK-NEXT: blendps 159; CHECK: ret 160 161 162define <4 x i32> @test6b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 163 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7> 164 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7> 165 %xor = xor <4 x i32> %shuf1, %shuf2 166 ret <4 x i32> %xor 167} 168; CHECK-LABEL: test6b 169; CHECK-NOT: blendps 170; CHECK: xorps 171; CHECK-NEXT: xorps 172; CHECK-NEXT: blendps 173; CHECK: ret 174 175define <4 x i32> @test1c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 176 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 177 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 178 %and = and <4 x i32> %shuf1, %shuf2 179 ret <4 x i32> %and 180} 181; CHECK-LABEL: test1c 182; CHECK-NOT: shufps 183; CHECK: andps 184; CHECK-NEXT: shufps 185; CHECK-NEXT: ret 186 187 188define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 189 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 190 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 191 %or = or <4 x i32> %shuf1, %shuf2 192 ret <4 x i32> %or 193} 194; CHECK-LABEL: test2c 195; CHECK-NOT: shufps 196; CHECK: orps 197; CHECK-NEXT: shufps 198; CHECK-NEXT: ret 199 200 201define <4 x i32> @test3c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 202 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 203 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 204 %xor = xor <4 x i32> %shuf1, %shuf2 205 ret <4 x i32> %xor 206} 207; CHECK-LABEL: test3c 208; CHECK-NOT: shufps 209; CHECK: xorps 210; CHECK-NEXT: xorps 211; CHECK-NEXT: shufps 212; CHECK-NEXT: ret 213 214 215define <4 x i32> @test4c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 216 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7> 217 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7> 218 %and = and <4 x i32> %shuf1, %shuf2 219 ret <4 x i32> %and 220} 221; CHECK-LABEL: test4c 222; CHECK-NOT: shufps 223; CHECK: andps 224; CHECK-NEXT: shufps 225; CHECK: ret 226 227 228define <4 x i32> @test5c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 229 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7> 230 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7> 231 %or = or <4 x i32> %shuf1, %shuf2 232 ret <4 x i32> %or 233} 234; CHECK-LABEL: test5c 235; CHECK-NOT: shufps 236; CHECK: orps 237; CHECK-NEXT: shufps 238; CHECK: ret 239 240 241define <4 x i32> @test6c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 242 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7> 243 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7> 244 %xor = xor <4 x i32> %shuf1, %shuf2 245 ret <4 x i32> %xor 246} 247; CHECK-LABEL: test6c 248; CHECK-NOT: shufps 249; CHECK: xorps 250; CHECK-NEXT: xorps 251; CHECK-NEXT: shufps 252; CHECK: ret 253 254