1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-SLOW 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=CHECK,AVX,AVX-FAST 6 7; fold (shl 0, x) -> 0 8define <4 x i32> @combine_vec_shl_zero(<4 x i32> %x) { 9; SSE-LABEL: combine_vec_shl_zero: 10; SSE: # %bb.0: 11; SSE-NEXT: xorps %xmm0, %xmm0 12; SSE-NEXT: retq 13; 14; AVX-LABEL: combine_vec_shl_zero: 15; AVX: # %bb.0: 16; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 17; AVX-NEXT: retq 18 %1 = shl <4 x i32> zeroinitializer, %x 19 ret <4 x i32> %1 20} 21 22; fold (shl x, c >= size(x)) -> undef 23define <4 x i32> @combine_vec_shl_outofrange0(<4 x i32> %x) { 24; CHECK-LABEL: combine_vec_shl_outofrange0: 25; CHECK: # %bb.0: 26; CHECK-NEXT: retq 27 %1 = shl <4 x i32> %x, <i32 33, i32 33, i32 33, i32 33> 28 ret <4 x i32> %1 29} 30 31define <4 x i32> @combine_vec_shl_outofrange1(<4 x i32> %x) { 32; CHECK-LABEL: combine_vec_shl_outofrange1: 33; CHECK: # %bb.0: 34; CHECK-NEXT: retq 35 %1 = shl <4 x i32> %x, <i32 33, i32 34, i32 35, i32 36> 36 ret <4 x i32> %1 37} 38 39define <4 x i32> @combine_vec_shl_outofrange2(<4 x i32> %a0) { 40; CHECK-LABEL: combine_vec_shl_outofrange2: 41; CHECK: # %bb.0: 42; CHECK-NEXT: retq 43 %1 = and <4 x i32> %a0, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> 44 %2 = shl <4 x i32> %1, <i32 33, i32 33, i32 33, i32 33> 45 ret <4 x i32> %2 46} 47 48define <4 x i32> @combine_vec_shl_outofrange3(<4 x i32> %a0) { 49; CHECK-LABEL: combine_vec_shl_outofrange3: 50; CHECK: # %bb.0: 51; CHECK-NEXT: retq 52 %1 = shl <4 x i32> %a0, <i32 33, i32 34, i32 35, i32 undef> 53 ret <4 x i32> %1 54} 55 56; fold (shl x, 0) -> x 57define <4 x i32> @combine_vec_shl_by_zero(<4 x i32> %x) { 58; CHECK-LABEL: combine_vec_shl_by_zero: 59; CHECK: # %bb.0: 60; CHECK-NEXT: retq 61 %1 = shl <4 x i32> %x, zeroinitializer 62 ret <4 x i32> %1 63} 64 65; if (shl x, c) is known to be zero, return 0 66define <4 x i32> @combine_vec_shl_known_zero0(<4 x i32> %x) { 67; SSE-LABEL: combine_vec_shl_known_zero0: 68; SSE: # %bb.0: 69; SSE-NEXT: xorps %xmm0, %xmm0 70; SSE-NEXT: retq 71; 72; AVX-LABEL: combine_vec_shl_known_zero0: 73; AVX: # %bb.0: 74; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 75; AVX-NEXT: retq 76 %1 = and <4 x i32> %x, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760> 77 %2 = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16> 78 ret <4 x i32> %2 79} 80 81define <4 x i32> @combine_vec_shl_known_zero1(<4 x i32> %x) { 82; SSE2-LABEL: combine_vec_shl_known_zero1: 83; SSE2: # %bb.0: 84; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 85; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65536,32768,16384,8192] 86; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 87; SSE2-NEXT: pmuludq %xmm1, %xmm0 88; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 89; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 90; SSE2-NEXT: pmuludq %xmm2, %xmm1 91; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 92; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 93; SSE2-NEXT: retq 94; 95; SSE41-LABEL: combine_vec_shl_known_zero1: 96; SSE41: # %bb.0: 97; SSE41-NEXT: pand {{.*}}(%rip), %xmm0 98; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 99; SSE41-NEXT: retq 100; 101; AVX-LABEL: combine_vec_shl_known_zero1: 102; AVX: # %bb.0: 103; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 104; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 105; AVX-NEXT: retq 106 %1 = and <4 x i32> %x, <i32 4294901760, i32 8589803520, i32 17179607040, i32 34359214080> 107 %2 = shl <4 x i32> %1, <i32 16, i32 15, i32 14, i32 13> 108 ret <4 x i32> %2 109} 110 111; fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 112define <4 x i32> @combine_vec_shl_trunc_and(<4 x i32> %x, <4 x i64> %y) { 113; SSE2-LABEL: combine_vec_shl_trunc_and: 114; SSE2: # %bb.0: 115; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] 116; SSE2-NEXT: andps {{.*}}(%rip), %xmm1 117; SSE2-NEXT: pslld $23, %xmm1 118; SSE2-NEXT: paddd {{.*}}(%rip), %xmm1 119; SSE2-NEXT: cvttps2dq %xmm1, %xmm1 120; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 121; SSE2-NEXT: pmuludq %xmm1, %xmm0 122; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 123; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 124; SSE2-NEXT: pmuludq %xmm2, %xmm1 125; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 126; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 127; SSE2-NEXT: retq 128; 129; SSE41-LABEL: combine_vec_shl_trunc_and: 130; SSE41: # %bb.0: 131; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] 132; SSE41-NEXT: andps {{.*}}(%rip), %xmm1 133; SSE41-NEXT: pslld $23, %xmm1 134; SSE41-NEXT: paddd {{.*}}(%rip), %xmm1 135; SSE41-NEXT: cvttps2dq %xmm1, %xmm1 136; SSE41-NEXT: pmulld %xmm1, %xmm0 137; SSE41-NEXT: retq 138; 139; AVX-SLOW-LABEL: combine_vec_shl_trunc_and: 140; AVX-SLOW: # %bb.0: 141; AVX-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm2 142; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] 143; AVX-SLOW-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 144; AVX-SLOW-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 145; AVX-SLOW-NEXT: vzeroupper 146; AVX-SLOW-NEXT: retq 147; 148; AVX-FAST-LABEL: combine_vec_shl_trunc_and: 149; AVX-FAST: # %bb.0: 150; AVX-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u> 151; AVX-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1 152; AVX-FAST-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 153; AVX-FAST-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 154; AVX-FAST-NEXT: vzeroupper 155; AVX-FAST-NEXT: retq 156 %1 = and <4 x i64> %y, <i64 15, i64 255, i64 4095, i64 65535> 157 %2 = trunc <4 x i64> %1 to <4 x i32> 158 %3 = shl <4 x i32> %x, %2 159 ret <4 x i32> %3 160} 161 162; fold (shl (shl x, c1), c2) -> (shl x, (add c1, c2)) 163define <4 x i32> @combine_vec_shl_shl0(<4 x i32> %x) { 164; SSE-LABEL: combine_vec_shl_shl0: 165; SSE: # %bb.0: 166; SSE-NEXT: pslld $6, %xmm0 167; SSE-NEXT: retq 168; 169; AVX-LABEL: combine_vec_shl_shl0: 170; AVX: # %bb.0: 171; AVX-NEXT: vpslld $6, %xmm0, %xmm0 172; AVX-NEXT: retq 173 %1 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 174 %2 = shl <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4> 175 ret <4 x i32> %2 176} 177 178define <4 x i32> @combine_vec_shl_shl1(<4 x i32> %x) { 179; SSE2-LABEL: combine_vec_shl_shl1: 180; SSE2: # %bb.0: 181; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [16,64,256,1024] 182; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 183; SSE2-NEXT: pmuludq %xmm1, %xmm0 184; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 185; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 186; SSE2-NEXT: pmuludq %xmm2, %xmm1 187; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 188; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 189; SSE2-NEXT: retq 190; 191; SSE41-LABEL: combine_vec_shl_shl1: 192; SSE41: # %bb.0: 193; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 194; SSE41-NEXT: retq 195; 196; AVX-LABEL: combine_vec_shl_shl1: 197; AVX: # %bb.0: 198; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 199; AVX-NEXT: retq 200 %1 = shl <4 x i32> %x, <i32 0, i32 1, i32 2, i32 3> 201 %2 = shl <4 x i32> %1, <i32 4, i32 5, i32 6, i32 7> 202 ret <4 x i32> %2 203} 204 205; fold (shl (shl x, c1), c2) -> 0 206define <4 x i32> @combine_vec_shl_shlr_zero0(<4 x i32> %x) { 207; SSE-LABEL: combine_vec_shl_shlr_zero0: 208; SSE: # %bb.0: 209; SSE-NEXT: xorps %xmm0, %xmm0 210; SSE-NEXT: retq 211; 212; AVX-LABEL: combine_vec_shl_shlr_zero0: 213; AVX: # %bb.0: 214; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 215; AVX-NEXT: retq 216 %1 = shl <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16> 217 %2 = shl <4 x i32> %1, <i32 20, i32 20, i32 20, i32 20> 218 ret <4 x i32> %2 219} 220 221define <4 x i32> @combine_vec_shl_shl_zero1(<4 x i32> %x) { 222; SSE-LABEL: combine_vec_shl_shl_zero1: 223; SSE: # %bb.0: 224; SSE-NEXT: xorps %xmm0, %xmm0 225; SSE-NEXT: retq 226; 227; AVX-LABEL: combine_vec_shl_shl_zero1: 228; AVX: # %bb.0: 229; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 230; AVX-NEXT: retq 231 %1 = shl <4 x i32> %x, <i32 17, i32 18, i32 19, i32 20> 232 %2 = shl <4 x i32> %1, <i32 25, i32 26, i32 27, i32 28> 233 ret <4 x i32> %2 234} 235 236; fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2)) 237define <8 x i32> @combine_vec_shl_ext_shl0(<8 x i16> %x) { 238; SSE2-LABEL: combine_vec_shl_ext_shl0: 239; SSE2: # %bb.0: 240; SSE2-NEXT: movdqa %xmm0, %xmm1 241; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 242; SSE2-NEXT: pslld $20, %xmm0 243; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 244; SSE2-NEXT: pslld $20, %xmm1 245; SSE2-NEXT: retq 246; 247; SSE41-LABEL: combine_vec_shl_ext_shl0: 248; SSE41: # %bb.0: 249; SSE41-NEXT: movdqa %xmm0, %xmm1 250; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 251; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 252; SSE41-NEXT: pslld $20, %xmm1 253; SSE41-NEXT: pslld $20, %xmm0 254; SSE41-NEXT: retq 255; 256; AVX-LABEL: combine_vec_shl_ext_shl0: 257; AVX: # %bb.0: 258; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 259; AVX-NEXT: vpslld $20, %ymm0, %ymm0 260; AVX-NEXT: retq 261 %1 = shl <8 x i16> %x, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> 262 %2 = sext <8 x i16> %1 to <8 x i32> 263 %3 = shl <8 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 264 ret <8 x i32> %3 265} 266 267define <8 x i32> @combine_vec_shl_ext_shl1(<8 x i16> %x) { 268; SSE-LABEL: combine_vec_shl_ext_shl1: 269; SSE: # %bb.0: 270; SSE-NEXT: xorps %xmm0, %xmm0 271; SSE-NEXT: xorps %xmm1, %xmm1 272; SSE-NEXT: retq 273; 274; AVX-LABEL: combine_vec_shl_ext_shl1: 275; AVX: # %bb.0: 276; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 277; AVX-NEXT: retq 278 %1 = shl <8 x i16> %x, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8> 279 %2 = sext <8 x i16> %1 to <8 x i32> 280 %3 = shl <8 x i32> %2, <i32 31, i32 31, i32 30, i32 30, i32 29, i32 29, i32 28, i32 28> 281 ret <8 x i32> %3 282} 283 284define <8 x i32> @combine_vec_shl_ext_shl2(<8 x i16> %x) { 285; SSE2-LABEL: combine_vec_shl_ext_shl2: 286; SSE2: # %bb.0: 287; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 288; SSE2-NEXT: psrad $16, %xmm1 289; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [131072,524288,2097152,8388608] 290; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 291; SSE2-NEXT: pmuludq %xmm3, %xmm1 292; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] 293; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 294; SSE2-NEXT: pmuludq %xmm4, %xmm1 295; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 296; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 297; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 298; SSE2-NEXT: psrad $16, %xmm0 299; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [33554432,134217728,536870912,2147483648] 300; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 301; SSE2-NEXT: pmuludq %xmm3, %xmm0 302; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] 303; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 304; SSE2-NEXT: pmuludq %xmm4, %xmm0 305; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 306; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 307; SSE2-NEXT: movdqa %xmm2, %xmm0 308; SSE2-NEXT: retq 309; 310; SSE41-LABEL: combine_vec_shl_ext_shl2: 311; SSE41: # %bb.0: 312; SSE41-NEXT: pmovsxwd %xmm0, %xmm2 313; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 314; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 315; SSE41-NEXT: pmovsxwd %xmm0, %xmm1 316; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 317; SSE41-NEXT: movdqa %xmm2, %xmm0 318; SSE41-NEXT: retq 319; 320; AVX-LABEL: combine_vec_shl_ext_shl2: 321; AVX: # %bb.0: 322; AVX-NEXT: vpmovsxwd %xmm0, %ymm0 323; AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 324; AVX-NEXT: retq 325 %1 = shl <8 x i16> %x, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8> 326 %2 = sext <8 x i16> %1 to <8 x i32> 327 %3 = shl <8 x i32> %2, <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 328 ret <8 x i32> %3 329} 330 331; fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 332define <8 x i32> @combine_vec_shl_zext_lshr0(<8 x i16> %x) { 333; SSE2-LABEL: combine_vec_shl_zext_lshr0: 334; SSE2: # %bb.0: 335; SSE2-NEXT: movdqa %xmm0, %xmm1 336; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 337; SSE2-NEXT: pxor %xmm2, %xmm2 338; SSE2-NEXT: movdqa %xmm1, %xmm0 339; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 340; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 341; SSE2-NEXT: retq 342; 343; SSE41-LABEL: combine_vec_shl_zext_lshr0: 344; SSE41: # %bb.0: 345; SSE41-NEXT: movdqa %xmm0, %xmm1 346; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 347; SSE41-NEXT: pxor %xmm2, %xmm2 348; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 349; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 350; SSE41-NEXT: retq 351; 352; AVX-LABEL: combine_vec_shl_zext_lshr0: 353; AVX: # %bb.0: 354; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 355; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 356; AVX-NEXT: retq 357 %1 = lshr <8 x i16> %x, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> 358 %2 = zext <8 x i16> %1 to <8 x i32> 359 %3 = shl <8 x i32> %2, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 360 ret <8 x i32> %3 361} 362 363define <8 x i32> @combine_vec_shl_zext_lshr1(<8 x i16> %x) { 364; SSE2-LABEL: combine_vec_shl_zext_lshr1: 365; SSE2: # %bb.0: 366; SSE2-NEXT: movdqa %xmm0, %xmm1 367; SSE2-NEXT: pmulhuw {{.*}}(%rip), %xmm1 368; SSE2-NEXT: pxor %xmm2, %xmm2 369; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm1 370; SSE2-NEXT: movdqa %xmm1, %xmm0 371; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 372; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 373; SSE2-NEXT: retq 374; 375; SSE41-LABEL: combine_vec_shl_zext_lshr1: 376; SSE41: # %bb.0: 377; SSE41-NEXT: movdqa %xmm0, %xmm1 378; SSE41-NEXT: pmulhuw {{.*}}(%rip), %xmm1 379; SSE41-NEXT: pxor %xmm2, %xmm2 380; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm1 381; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 382; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 383; SSE41-NEXT: retq 384; 385; AVX-LABEL: combine_vec_shl_zext_lshr1: 386; AVX: # %bb.0: 387; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm0 388; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 389; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 390; AVX-NEXT: retq 391 %1 = lshr <8 x i16> %x, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8> 392 %2 = zext <8 x i16> %1 to <8 x i32> 393 %3 = shl <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 394 ret <8 x i32> %3 395} 396 397; fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 398define <4 x i32> @combine_vec_shl_ge_ashr_extact0(<4 x i32> %x) { 399; SSE-LABEL: combine_vec_shl_ge_ashr_extact0: 400; SSE: # %bb.0: 401; SSE-NEXT: pslld $2, %xmm0 402; SSE-NEXT: retq 403; 404; AVX-LABEL: combine_vec_shl_ge_ashr_extact0: 405; AVX: # %bb.0: 406; AVX-NEXT: vpslld $2, %xmm0, %xmm0 407; AVX-NEXT: retq 408 %1 = ashr exact <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 409 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5> 410 ret <4 x i32> %2 411} 412 413define <4 x i32> @combine_vec_shl_ge_ashr_extact1(<4 x i32> %x) { 414; SSE2-LABEL: combine_vec_shl_ge_ashr_extact1: 415; SSE2: # %bb.0: 416; SSE2-NEXT: movdqa %xmm0, %xmm1 417; SSE2-NEXT: psrad $3, %xmm1 418; SSE2-NEXT: movdqa %xmm0, %xmm2 419; SSE2-NEXT: psrad $5, %xmm2 420; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 421; SSE2-NEXT: movdqa %xmm0, %xmm1 422; SSE2-NEXT: psrad $8, %xmm1 423; SSE2-NEXT: psrad $4, %xmm0 424; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[3,3] 425; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32,64,128,256] 426; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 427; SSE2-NEXT: pmuludq %xmm0, %xmm3 428; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 429; SSE2-NEXT: pmuludq %xmm1, %xmm2 430; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 431; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 432; SSE2-NEXT: retq 433; 434; SSE41-LABEL: combine_vec_shl_ge_ashr_extact1: 435; SSE41: # %bb.0: 436; SSE41-NEXT: movdqa %xmm0, %xmm1 437; SSE41-NEXT: psrad $8, %xmm1 438; SSE41-NEXT: movdqa %xmm0, %xmm2 439; SSE41-NEXT: psrad $4, %xmm2 440; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 441; SSE41-NEXT: movdqa %xmm0, %xmm1 442; SSE41-NEXT: psrad $5, %xmm1 443; SSE41-NEXT: psrad $3, %xmm0 444; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 445; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 446; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 447; SSE41-NEXT: retq 448; 449; AVX-LABEL: combine_vec_shl_ge_ashr_extact1: 450; AVX: # %bb.0: 451; AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 452; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 453; AVX-NEXT: retq 454 %1 = ashr exact <4 x i32> %x, <i32 3, i32 4, i32 5, i32 8> 455 %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8> 456 ret <4 x i32> %2 457} 458 459; fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 460define <4 x i32> @combine_vec_shl_lt_ashr_extact0(<4 x i32> %x) { 461; SSE-LABEL: combine_vec_shl_lt_ashr_extact0: 462; SSE: # %bb.0: 463; SSE-NEXT: psrad $2, %xmm0 464; SSE-NEXT: retq 465; 466; AVX-LABEL: combine_vec_shl_lt_ashr_extact0: 467; AVX: # %bb.0: 468; AVX-NEXT: vpsrad $2, %xmm0, %xmm0 469; AVX-NEXT: retq 470 %1 = ashr exact <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 471 %2 = shl <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 472 ret <4 x i32> %2 473} 474 475define <4 x i32> @combine_vec_shl_lt_ashr_extact1(<4 x i32> %x) { 476; SSE2-LABEL: combine_vec_shl_lt_ashr_extact1: 477; SSE2: # %bb.0: 478; SSE2-NEXT: movdqa %xmm0, %xmm1 479; SSE2-NEXT: psrad $5, %xmm1 480; SSE2-NEXT: movdqa %xmm0, %xmm2 481; SSE2-NEXT: psrad $7, %xmm2 482; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 483; SSE2-NEXT: movdqa %xmm0, %xmm1 484; SSE2-NEXT: psrad $8, %xmm1 485; SSE2-NEXT: psrad $6, %xmm0 486; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[3,3] 487; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [8,16,32,256] 488; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 489; SSE2-NEXT: pmuludq %xmm0, %xmm3 490; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 491; SSE2-NEXT: pmuludq %xmm1, %xmm2 492; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 493; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 494; SSE2-NEXT: retq 495; 496; SSE41-LABEL: combine_vec_shl_lt_ashr_extact1: 497; SSE41: # %bb.0: 498; SSE41-NEXT: movdqa %xmm0, %xmm1 499; SSE41-NEXT: psrad $8, %xmm1 500; SSE41-NEXT: movdqa %xmm0, %xmm2 501; SSE41-NEXT: psrad $6, %xmm2 502; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 503; SSE41-NEXT: movdqa %xmm0, %xmm1 504; SSE41-NEXT: psrad $7, %xmm1 505; SSE41-NEXT: psrad $5, %xmm0 506; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 507; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 508; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 509; SSE41-NEXT: retq 510; 511; AVX-LABEL: combine_vec_shl_lt_ashr_extact1: 512; AVX: # %bb.0: 513; AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 514; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 515; AVX-NEXT: retq 516 %1 = ashr exact <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 517 %2 = shl <4 x i32> %1, <i32 3, i32 4, i32 5, i32 8> 518 ret <4 x i32> %2 519} 520 521; fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) if C2 > C1 522define <4 x i32> @combine_vec_shl_gt_lshr0(<4 x i32> %x) { 523; SSE-LABEL: combine_vec_shl_gt_lshr0: 524; SSE: # %bb.0: 525; SSE-NEXT: pslld $2, %xmm0 526; SSE-NEXT: pand {{.*}}(%rip), %xmm0 527; SSE-NEXT: retq 528; 529; AVX-LABEL: combine_vec_shl_gt_lshr0: 530; AVX: # %bb.0: 531; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967264,4294967264,4294967264,4294967264] 532; AVX-NEXT: vpslld $2, %xmm0, %xmm0 533; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 534; AVX-NEXT: retq 535 %1 = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 536 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5> 537 ret <4 x i32> %2 538} 539 540define <4 x i32> @combine_vec_shl_gt_lshr1(<4 x i32> %x) { 541; SSE2-LABEL: combine_vec_shl_gt_lshr1: 542; SSE2: # %bb.0: 543; SSE2-NEXT: movdqa %xmm0, %xmm1 544; SSE2-NEXT: psrld $3, %xmm1 545; SSE2-NEXT: movdqa %xmm0, %xmm2 546; SSE2-NEXT: psrld $5, %xmm2 547; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 548; SSE2-NEXT: movdqa %xmm0, %xmm1 549; SSE2-NEXT: psrld $8, %xmm1 550; SSE2-NEXT: psrld $4, %xmm0 551; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[3,3] 552; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32,64,128,256] 553; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 554; SSE2-NEXT: pmuludq %xmm0, %xmm3 555; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 556; SSE2-NEXT: pmuludq %xmm1, %xmm2 557; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 558; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 559; SSE2-NEXT: retq 560; 561; SSE41-LABEL: combine_vec_shl_gt_lshr1: 562; SSE41: # %bb.0: 563; SSE41-NEXT: movdqa %xmm0, %xmm1 564; SSE41-NEXT: psrld $8, %xmm1 565; SSE41-NEXT: movdqa %xmm0, %xmm2 566; SSE41-NEXT: psrld $4, %xmm2 567; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 568; SSE41-NEXT: movdqa %xmm0, %xmm1 569; SSE41-NEXT: psrld $5, %xmm1 570; SSE41-NEXT: psrld $3, %xmm0 571; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 572; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 573; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 574; SSE41-NEXT: retq 575; 576; AVX-LABEL: combine_vec_shl_gt_lshr1: 577; AVX: # %bb.0: 578; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 579; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 580; AVX-NEXT: retq 581 %1 = lshr <4 x i32> %x, <i32 3, i32 4, i32 5, i32 8> 582 %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8> 583 ret <4 x i32> %2 584} 585 586; fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 >= C2 587define <4 x i32> @combine_vec_shl_le_lshr0(<4 x i32> %x) { 588; SSE-LABEL: combine_vec_shl_le_lshr0: 589; SSE: # %bb.0: 590; SSE-NEXT: psrld $2, %xmm0 591; SSE-NEXT: pand {{.*}}(%rip), %xmm0 592; SSE-NEXT: retq 593; 594; AVX-LABEL: combine_vec_shl_le_lshr0: 595; AVX: # %bb.0: 596; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1073741816,1073741816,1073741816,1073741816] 597; AVX-NEXT: vpsrld $2, %xmm0, %xmm0 598; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 599; AVX-NEXT: retq 600 %1 = lshr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 601 %2 = shl <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 602 ret <4 x i32> %2 603} 604 605define <4 x i32> @combine_vec_shl_le_lshr1(<4 x i32> %x) { 606; SSE2-LABEL: combine_vec_shl_le_lshr1: 607; SSE2: # %bb.0: 608; SSE2-NEXT: movdqa %xmm0, %xmm1 609; SSE2-NEXT: psrld $5, %xmm1 610; SSE2-NEXT: movdqa %xmm0, %xmm2 611; SSE2-NEXT: psrld $7, %xmm2 612; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 613; SSE2-NEXT: movdqa %xmm0, %xmm1 614; SSE2-NEXT: psrld $8, %xmm1 615; SSE2-NEXT: psrld $6, %xmm0 616; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[3,3] 617; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [8,16,32,256] 618; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 619; SSE2-NEXT: pmuludq %xmm0, %xmm3 620; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 621; SSE2-NEXT: pmuludq %xmm1, %xmm2 622; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 623; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 624; SSE2-NEXT: retq 625; 626; SSE41-LABEL: combine_vec_shl_le_lshr1: 627; SSE41: # %bb.0: 628; SSE41-NEXT: movdqa %xmm0, %xmm1 629; SSE41-NEXT: psrld $8, %xmm1 630; SSE41-NEXT: movdqa %xmm0, %xmm2 631; SSE41-NEXT: psrld $6, %xmm2 632; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 633; SSE41-NEXT: movdqa %xmm0, %xmm1 634; SSE41-NEXT: psrld $7, %xmm1 635; SSE41-NEXT: psrld $5, %xmm0 636; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 637; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 638; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 639; SSE41-NEXT: retq 640; 641; AVX-LABEL: combine_vec_shl_le_lshr1: 642; AVX: # %bb.0: 643; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 644; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 645; AVX-NEXT: retq 646 %1 = lshr <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 647 %2 = shl <4 x i32> %1, <i32 3, i32 4, i32 5, i32 8> 648 ret <4 x i32> %2 649} 650 651; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 652define <4 x i32> @combine_vec_shl_ashr0(<4 x i32> %x) { 653; SSE-LABEL: combine_vec_shl_ashr0: 654; SSE: # %bb.0: 655; SSE-NEXT: andps {{.*}}(%rip), %xmm0 656; SSE-NEXT: retq 657; 658; AVX-LABEL: combine_vec_shl_ashr0: 659; AVX: # %bb.0: 660; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [4294967264,4294967264,4294967264,4294967264] 661; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 662; AVX-NEXT: retq 663 %1 = ashr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 664 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5> 665 ret <4 x i32> %2 666} 667 668define <4 x i32> @combine_vec_shl_ashr1(<4 x i32> %x) { 669; SSE-LABEL: combine_vec_shl_ashr1: 670; SSE: # %bb.0: 671; SSE-NEXT: andps {{.*}}(%rip), %xmm0 672; SSE-NEXT: retq 673; 674; AVX-LABEL: combine_vec_shl_ashr1: 675; AVX: # %bb.0: 676; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 677; AVX-NEXT: retq 678 %1 = ashr <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 679 %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8> 680 ret <4 x i32> %2 681} 682 683; fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 684define <4 x i32> @combine_vec_shl_add0(<4 x i32> %x) { 685; SSE-LABEL: combine_vec_shl_add0: 686; SSE: # %bb.0: 687; SSE-NEXT: pslld $2, %xmm0 688; SSE-NEXT: paddd {{.*}}(%rip), %xmm0 689; SSE-NEXT: retq 690; 691; AVX-LABEL: combine_vec_shl_add0: 692; AVX: # %bb.0: 693; AVX-NEXT: vpslld $2, %xmm0, %xmm0 694; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] 695; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 696; AVX-NEXT: retq 697 %1 = add <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 698 %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2> 699 ret <4 x i32> %2 700} 701 702define <4 x i32> @combine_vec_shl_add1(<4 x i32> %x) { 703; SSE2-LABEL: combine_vec_shl_add1: 704; SSE2: # %bb.0: 705; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,16] 706; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 707; SSE2-NEXT: pmuludq %xmm1, %xmm0 708; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 709; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 710; SSE2-NEXT: pmuludq %xmm2, %xmm1 711; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 712; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 713; SSE2-NEXT: paddd {{.*}}(%rip), %xmm0 714; SSE2-NEXT: retq 715; 716; SSE41-LABEL: combine_vec_shl_add1: 717; SSE41: # %bb.0: 718; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 719; SSE41-NEXT: paddd {{.*}}(%rip), %xmm0 720; SSE41-NEXT: retq 721; 722; AVX-LABEL: combine_vec_shl_add1: 723; AVX: # %bb.0: 724; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 725; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 726; AVX-NEXT: retq 727 %1 = add <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 728 %2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4> 729 ret <4 x i32> %2 730} 731 732; fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 733define <4 x i32> @combine_vec_shl_or0(<4 x i32> %x) { 734; SSE-LABEL: combine_vec_shl_or0: 735; SSE: # %bb.0: 736; SSE-NEXT: pslld $2, %xmm0 737; SSE-NEXT: por {{.*}}(%rip), %xmm0 738; SSE-NEXT: retq 739; 740; AVX-LABEL: combine_vec_shl_or0: 741; AVX: # %bb.0: 742; AVX-NEXT: vpslld $2, %xmm0, %xmm0 743; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] 744; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 745; AVX-NEXT: retq 746 %1 = or <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 747 %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2> 748 ret <4 x i32> %2 749} 750 751define <4 x i32> @combine_vec_shl_or1(<4 x i32> %x) { 752; SSE2-LABEL: combine_vec_shl_or1: 753; SSE2: # %bb.0: 754; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,16] 755; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 756; SSE2-NEXT: pmuludq %xmm1, %xmm0 757; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 758; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 759; SSE2-NEXT: pmuludq %xmm2, %xmm1 760; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 761; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 762; SSE2-NEXT: por {{.*}}(%rip), %xmm0 763; SSE2-NEXT: retq 764; 765; SSE41-LABEL: combine_vec_shl_or1: 766; SSE41: # %bb.0: 767; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 768; SSE41-NEXT: por {{.*}}(%rip), %xmm0 769; SSE41-NEXT: retq 770; 771; AVX-LABEL: combine_vec_shl_or1: 772; AVX: # %bb.0: 773; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 774; AVX-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 775; AVX-NEXT: retq 776 %1 = or <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 777 %2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4> 778 ret <4 x i32> %2 779} 780 781; fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) 782define <4 x i32> @combine_vec_shl_mul0(<4 x i32> %x) { 783; SSE2-LABEL: combine_vec_shl_mul0: 784; SSE2: # %bb.0: 785; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [20,20,20,20] 786; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 787; SSE2-NEXT: pmuludq %xmm1, %xmm0 788; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 789; SSE2-NEXT: pmuludq %xmm1, %xmm2 790; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 791; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 792; SSE2-NEXT: retq 793; 794; SSE41-LABEL: combine_vec_shl_mul0: 795; SSE41: # %bb.0: 796; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 797; SSE41-NEXT: retq 798; 799; AVX-LABEL: combine_vec_shl_mul0: 800; AVX: # %bb.0: 801; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] 802; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 803; AVX-NEXT: retq 804 %1 = mul <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 805 %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2> 806 ret <4 x i32> %2 807} 808 809define <4 x i32> @combine_vec_shl_mul1(<4 x i32> %x) { 810; SSE2-LABEL: combine_vec_shl_mul1: 811; SSE2: # %bb.0: 812; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [10,24,56,128] 813; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 814; SSE2-NEXT: pmuludq %xmm1, %xmm0 815; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 816; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 817; SSE2-NEXT: pmuludq %xmm2, %xmm1 818; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 819; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 820; SSE2-NEXT: retq 821; 822; SSE41-LABEL: combine_vec_shl_mul1: 823; SSE41: # %bb.0: 824; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 825; SSE41-NEXT: retq 826; 827; AVX-LABEL: combine_vec_shl_mul1: 828; AVX: # %bb.0: 829; AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 830; AVX-NEXT: retq 831 %1 = mul <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 832 %2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4> 833 ret <4 x i32> %2 834} 835 836; fold (add (shl x, c1), c2) -> (or (shl x, c1), c2) 837define <4 x i32> @combine_vec_add_shl_nonsplat(<4 x i32> %a0) { 838; SSE2-LABEL: combine_vec_add_shl_nonsplat: 839; SSE2: # %bb.0: 840; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4,8,16,32] 841; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 842; SSE2-NEXT: pmuludq %xmm1, %xmm0 843; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 844; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 845; SSE2-NEXT: pmuludq %xmm2, %xmm1 846; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 847; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 848; SSE2-NEXT: por {{.*}}(%rip), %xmm0 849; SSE2-NEXT: retq 850; 851; SSE41-LABEL: combine_vec_add_shl_nonsplat: 852; SSE41: # %bb.0: 853; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 854; SSE41-NEXT: por {{.*}}(%rip), %xmm0 855; SSE41-NEXT: retq 856; 857; AVX-LABEL: combine_vec_add_shl_nonsplat: 858; AVX: # %bb.0: 859; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 860; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] 861; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 862; AVX-NEXT: retq 863 %1 = shl <4 x i32> %a0, <i32 2, i32 3, i32 4, i32 5> 864 %2 = add <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 865 ret <4 x i32> %2 866} 867 868define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) { 869; SSE2-LABEL: combine_vec_add_shl_and_nonsplat: 870; SSE2: # %bb.0: 871; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 872; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4,8,16,32] 873; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 874; SSE2-NEXT: pmuludq %xmm1, %xmm0 875; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 876; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 877; SSE2-NEXT: pmuludq %xmm2, %xmm1 878; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 879; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 880; SSE2-NEXT: por {{.*}}(%rip), %xmm0 881; SSE2-NEXT: retq 882; 883; SSE41-LABEL: combine_vec_add_shl_and_nonsplat: 884; SSE41: # %bb.0: 885; SSE41-NEXT: pxor %xmm1, %xmm1 886; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7] 887; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 888; SSE41-NEXT: por {{.*}}(%rip), %xmm0 889; SSE41-NEXT: retq 890; 891; AVX-LABEL: combine_vec_add_shl_and_nonsplat: 892; AVX: # %bb.0: 893; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 894; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7] 895; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 896; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] 897; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 898; AVX-NEXT: retq 899 %1 = and <4 x i32> %a0, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760> 900 %2 = shl <4 x i32> %1, <i32 2, i32 3, i32 4, i32 5> 901 %3 = add <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15> 902 ret <4 x i32> %3 903} 904 905define <4 x i32> @combine_vec_add_shuffle_shl(<4 x i32> %a0) { 906; SSE2-LABEL: combine_vec_add_shuffle_shl: 907; SSE2: # %bb.0: 908; SSE2-NEXT: movdqa %xmm0, %xmm1 909; SSE2-NEXT: pslld $3, %xmm1 910; SSE2-NEXT: pslld $2, %xmm0 911; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 912; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,3,0] 913; SSE2-NEXT: por {{.*}}(%rip), %xmm0 914; SSE2-NEXT: retq 915; 916; SSE41-LABEL: combine_vec_add_shuffle_shl: 917; SSE41: # %bb.0: 918; SSE41-NEXT: movdqa %xmm0, %xmm1 919; SSE41-NEXT: pslld $3, %xmm1 920; SSE41-NEXT: pslld $2, %xmm0 921; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 922; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,0] 923; SSE41-NEXT: por {{.*}}(%rip), %xmm0 924; SSE41-NEXT: retq 925; 926; AVX-LABEL: combine_vec_add_shuffle_shl: 927; AVX: # %bb.0: 928; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 929; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0] 930; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] 931; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 932; AVX-NEXT: retq 933 %1 = shl <4 x i32> %a0, <i32 2, i32 3, i32 0, i32 1> 934 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0> 935 %3 = add <4 x i32> %2, <i32 3, i32 3, i32 3, i32 3> 936 ret <4 x i32> %3 937} 938