1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-SLOW 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=CHECK,AVX,AVX-FAST 6 7; fold (shl 0, x) -> 0 8define <4 x i32> @combine_vec_shl_zero(<4 x i32> %x) { 9; SSE-LABEL: combine_vec_shl_zero: 10; SSE: # %bb.0: 11; SSE-NEXT: xorps %xmm0, %xmm0 12; SSE-NEXT: retq 13; 14; AVX-LABEL: combine_vec_shl_zero: 15; AVX: # %bb.0: 16; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 17; AVX-NEXT: retq 18 %1 = shl <4 x i32> zeroinitializer, %x 19 ret <4 x i32> %1 20} 21 22; fold (shl x, c >= size(x)) -> undef 23define <4 x i32> @combine_vec_shl_outofrange0(<4 x i32> %x) { 24; CHECK-LABEL: combine_vec_shl_outofrange0: 25; CHECK: # %bb.0: 26; CHECK-NEXT: retq 27 %1 = shl <4 x i32> %x, <i32 33, i32 33, i32 33, i32 33> 28 ret <4 x i32> %1 29} 30 31define <4 x i32> @combine_vec_shl_outofrange1(<4 x i32> %x) { 32; CHECK-LABEL: combine_vec_shl_outofrange1: 33; CHECK: # %bb.0: 34; CHECK-NEXT: retq 35 %1 = shl <4 x i32> %x, <i32 33, i32 34, i32 35, i32 36> 36 ret <4 x i32> %1 37} 38 39define <4 x i32> @combine_vec_shl_outofrange2(<4 x i32> %a0) { 40; CHECK-LABEL: combine_vec_shl_outofrange2: 41; CHECK: # %bb.0: 42; CHECK-NEXT: retq 43 %1 = and <4 x i32> %a0, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> 44 %2 = shl <4 x i32> %1, <i32 33, i32 33, i32 33, i32 33> 45 ret <4 x i32> %2 46} 47 48; fold (shl x, 0) -> x 49define <4 x i32> @combine_vec_shl_by_zero(<4 x i32> %x) { 50; CHECK-LABEL: combine_vec_shl_by_zero: 51; CHECK: # %bb.0: 52; CHECK-NEXT: retq 53 %1 = shl <4 x i32> %x, zeroinitializer 54 ret <4 x i32> %1 55} 56 57; if (shl x, c) is known to be zero, return 0 58define <4 x i32> @combine_vec_shl_known_zero0(<4 x i32> %x) { 59; SSE-LABEL: combine_vec_shl_known_zero0: 60; SSE: # %bb.0: 61; SSE-NEXT: xorps %xmm0, %xmm0 62; SSE-NEXT: retq 63; 64; AVX-LABEL: combine_vec_shl_known_zero0: 65; AVX: # %bb.0: 66; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 67; AVX-NEXT: retq 68 %1 = and <4 x i32> %x, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760> 69 %2 = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16> 70 ret <4 x i32> %2 71} 72 73define <4 x i32> @combine_vec_shl_known_zero1(<4 x i32> %x) { 74; SSE2-LABEL: combine_vec_shl_known_zero1: 75; SSE2: # %bb.0: 76; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 77; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65536,32768,16384,8192] 78; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 79; SSE2-NEXT: pmuludq %xmm1, %xmm0 80; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 81; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 82; SSE2-NEXT: pmuludq %xmm2, %xmm1 83; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 84; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 85; SSE2-NEXT: retq 86; 87; SSE41-LABEL: combine_vec_shl_known_zero1: 88; SSE41: # %bb.0: 89; SSE41-NEXT: pand {{.*}}(%rip), %xmm0 90; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 91; SSE41-NEXT: retq 92; 93; AVX-LABEL: combine_vec_shl_known_zero1: 94; AVX: # %bb.0: 95; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 96; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 97; AVX-NEXT: retq 98 %1 = and <4 x i32> %x, <i32 4294901760, i32 8589803520, i32 17179607040, i32 34359214080> 99 %2 = shl <4 x i32> %1, <i32 16, i32 15, i32 14, i32 13> 100 ret <4 x i32> %2 101} 102 103; fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 104define <4 x i32> @combine_vec_shl_trunc_and(<4 x i32> %x, <4 x i64> %y) { 105; SSE2-LABEL: combine_vec_shl_trunc_and: 106; SSE2: # %bb.0: 107; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] 108; SSE2-NEXT: andps {{.*}}(%rip), %xmm1 109; SSE2-NEXT: pslld $23, %xmm1 110; SSE2-NEXT: paddd {{.*}}(%rip), %xmm1 111; SSE2-NEXT: cvttps2dq %xmm1, %xmm1 112; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 113; SSE2-NEXT: pmuludq %xmm1, %xmm0 114; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 115; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 116; SSE2-NEXT: pmuludq %xmm2, %xmm1 117; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 118; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 119; SSE2-NEXT: retq 120; 121; SSE41-LABEL: combine_vec_shl_trunc_and: 122; SSE41: # %bb.0: 123; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] 124; SSE41-NEXT: andps {{.*}}(%rip), %xmm1 125; SSE41-NEXT: pslld $23, %xmm1 126; SSE41-NEXT: paddd {{.*}}(%rip), %xmm1 127; SSE41-NEXT: cvttps2dq %xmm1, %xmm1 128; SSE41-NEXT: pmulld %xmm1, %xmm0 129; SSE41-NEXT: retq 130; 131; AVX-SLOW-LABEL: combine_vec_shl_trunc_and: 132; AVX-SLOW: # %bb.0: 133; AVX-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 134; AVX-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 135; AVX-SLOW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 136; AVX-SLOW-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 137; AVX-SLOW-NEXT: vzeroupper 138; AVX-SLOW-NEXT: retq 139; 140; AVX-FAST-LABEL: combine_vec_shl_trunc_and: 141; AVX-FAST: # %bb.0: 142; AVX-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] 143; AVX-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1 144; AVX-FAST-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 145; AVX-FAST-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 146; AVX-FAST-NEXT: vzeroupper 147; AVX-FAST-NEXT: retq 148 %1 = and <4 x i64> %y, <i64 15, i64 255, i64 4095, i64 65535> 149 %2 = trunc <4 x i64> %1 to <4 x i32> 150 %3 = shl <4 x i32> %x, %2 151 ret <4 x i32> %3 152} 153 154; fold (shl (shl x, c1), c2) -> (shl x, (add c1, c2)) 155define <4 x i32> @combine_vec_shl_shl0(<4 x i32> %x) { 156; SSE-LABEL: combine_vec_shl_shl0: 157; SSE: # %bb.0: 158; SSE-NEXT: pslld $6, %xmm0 159; SSE-NEXT: retq 160; 161; AVX-LABEL: combine_vec_shl_shl0: 162; AVX: # %bb.0: 163; AVX-NEXT: vpslld $6, %xmm0, %xmm0 164; AVX-NEXT: retq 165 %1 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 166 %2 = shl <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4> 167 ret <4 x i32> %2 168} 169 170define <4 x i32> @combine_vec_shl_shl1(<4 x i32> %x) { 171; SSE2-LABEL: combine_vec_shl_shl1: 172; SSE2: # %bb.0: 173; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [16,64,256,1024] 174; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 175; SSE2-NEXT: pmuludq %xmm1, %xmm0 176; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 177; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 178; SSE2-NEXT: pmuludq %xmm2, %xmm1 179; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 180; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 181; SSE2-NEXT: retq 182; 183; SSE41-LABEL: combine_vec_shl_shl1: 184; SSE41: # %bb.0: 185; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 186; SSE41-NEXT: retq 187; 188; AVX-LABEL: combine_vec_shl_shl1: 189; AVX: # %bb.0: 190; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 191; AVX-NEXT: retq 192 %1 = shl <4 x i32> %x, <i32 0, i32 1, i32 2, i32 3> 193 %2 = shl <4 x i32> %1, <i32 4, i32 5, i32 6, i32 7> 194 ret <4 x i32> %2 195} 196 197; fold (shl (shl x, c1), c2) -> 0 198define <4 x i32> @combine_vec_shl_shlr_zero0(<4 x i32> %x) { 199; SSE-LABEL: combine_vec_shl_shlr_zero0: 200; SSE: # %bb.0: 201; SSE-NEXT: xorps %xmm0, %xmm0 202; SSE-NEXT: retq 203; 204; AVX-LABEL: combine_vec_shl_shlr_zero0: 205; AVX: # %bb.0: 206; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 207; AVX-NEXT: retq 208 %1 = shl <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16> 209 %2 = shl <4 x i32> %1, <i32 20, i32 20, i32 20, i32 20> 210 ret <4 x i32> %2 211} 212 213define <4 x i32> @combine_vec_shl_shl_zero1(<4 x i32> %x) { 214; SSE-LABEL: combine_vec_shl_shl_zero1: 215; SSE: # %bb.0: 216; SSE-NEXT: xorps %xmm0, %xmm0 217; SSE-NEXT: retq 218; 219; AVX-LABEL: combine_vec_shl_shl_zero1: 220; AVX: # %bb.0: 221; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 222; AVX-NEXT: retq 223 %1 = shl <4 x i32> %x, <i32 17, i32 18, i32 19, i32 20> 224 %2 = shl <4 x i32> %1, <i32 25, i32 26, i32 27, i32 28> 225 ret <4 x i32> %2 226} 227 228; fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 229define <8 x i32> @combine_vec_shl_ext_shl0(<8 x i16> %x) { 230; SSE2-LABEL: combine_vec_shl_ext_shl0: 231; SSE2: # %bb.0: 232; SSE2-NEXT: movdqa %xmm0, %xmm1 233; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 234; SSE2-NEXT: pslld $20, %xmm0 235; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 236; SSE2-NEXT: pslld $20, %xmm1 237; SSE2-NEXT: retq 238; 239; SSE41-LABEL: combine_vec_shl_ext_shl0: 240; SSE41: # %bb.0: 241; SSE41-NEXT: movdqa %xmm0, %xmm1 242; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 243; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 244; SSE41-NEXT: pslld $20, %xmm1 245; SSE41-NEXT: pslld $20, %xmm0 246; SSE41-NEXT: retq 247; 248; AVX-LABEL: combine_vec_shl_ext_shl0: 249; AVX: # %bb.0: 250; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 251; AVX-NEXT: vpslld $20, %ymm0, %ymm0 252; AVX-NEXT: retq 253 %1 = shl <8 x i16> %x, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> 254 %2 = sext <8 x i16> %1 to <8 x i32> 255 %3 = shl <8 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 256 ret <8 x i32> %3 257} 258 259define <8 x i32> @combine_vec_shl_ext_shl1(<8 x i16> %x) { 260; SSE2-LABEL: combine_vec_shl_ext_shl1: 261; SSE2: # %bb.0: 262; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0 263; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 264; SSE2-NEXT: psrad $16, %xmm1 265; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 266; SSE2-NEXT: psrad $16, %xmm0 267; SSE2-NEXT: movdqa %xmm0, %xmm2 268; SSE2-NEXT: pslld $31, %xmm2 269; SSE2-NEXT: pslld $30, %xmm0 270; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 271; SSE2-NEXT: movdqa %xmm1, %xmm2 272; SSE2-NEXT: pslld $29, %xmm2 273; SSE2-NEXT: pslld $28, %xmm1 274; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 275; SSE2-NEXT: retq 276; 277; SSE41-LABEL: combine_vec_shl_ext_shl1: 278; SSE41: # %bb.0: 279; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm0 280; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 281; SSE41-NEXT: pmovsxwd %xmm1, %xmm1 282; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 283; SSE41-NEXT: movdqa %xmm0, %xmm2 284; SSE41-NEXT: pslld $30, %xmm2 285; SSE41-NEXT: pslld $31, %xmm0 286; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 287; SSE41-NEXT: movdqa %xmm1, %xmm2 288; SSE41-NEXT: pslld $28, %xmm2 289; SSE41-NEXT: pslld $29, %xmm1 290; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 291; SSE41-NEXT: retq 292; 293; AVX-LABEL: combine_vec_shl_ext_shl1: 294; AVX: # %bb.0: 295; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 296; AVX-NEXT: vpmovsxwd %xmm0, %ymm0 297; AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 298; AVX-NEXT: retq 299 %1 = shl <8 x i16> %x, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8> 300 %2 = sext <8 x i16> %1 to <8 x i32> 301 %3 = shl <8 x i32> %2, <i32 31, i32 31, i32 30, i32 30, i32 29, i32 29, i32 28, i32 28> 302 ret <8 x i32> %3 303} 304 305; fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 306define <8 x i32> @combine_vec_shl_zext_lshr0(<8 x i16> %x) { 307; SSE2-LABEL: combine_vec_shl_zext_lshr0: 308; SSE2: # %bb.0: 309; SSE2-NEXT: movdqa %xmm0, %xmm1 310; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 311; SSE2-NEXT: pxor %xmm2, %xmm2 312; SSE2-NEXT: movdqa %xmm1, %xmm0 313; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 314; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 315; SSE2-NEXT: retq 316; 317; SSE41-LABEL: combine_vec_shl_zext_lshr0: 318; SSE41: # %bb.0: 319; SSE41-NEXT: pand {{.*}}(%rip), %xmm0 320; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 321; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 322; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 323; SSE41-NEXT: movdqa %xmm2, %xmm0 324; SSE41-NEXT: retq 325; 326; AVX-LABEL: combine_vec_shl_zext_lshr0: 327; AVX: # %bb.0: 328; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 329; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 330; AVX-NEXT: retq 331 %1 = lshr <8 x i16> %x, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> 332 %2 = zext <8 x i16> %1 to <8 x i32> 333 %3 = shl <8 x i32> %2, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 334 ret <8 x i32> %3 335} 336 337define <8 x i32> @combine_vec_shl_zext_lshr1(<8 x i16> %x) { 338; SSE2-LABEL: combine_vec_shl_zext_lshr1: 339; SSE2: # %bb.0: 340; SSE2-NEXT: pmulhuw {{.*}}(%rip), %xmm0 341; SSE2-NEXT: pxor %xmm1, %xmm1 342; SSE2-NEXT: movdqa %xmm0, %xmm2 343; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 344; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 345; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,16] 346; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 347; SSE2-NEXT: pmuludq %xmm1, %xmm0 348; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 349; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 350; SSE2-NEXT: pmuludq %xmm3, %xmm1 351; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 352; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 353; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32,64,128,256] 354; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 355; SSE2-NEXT: pmuludq %xmm3, %xmm2 356; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 357; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 358; SSE2-NEXT: pmuludq %xmm4, %xmm2 359; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 360; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 361; SSE2-NEXT: retq 362; 363; SSE41-LABEL: combine_vec_shl_zext_lshr1: 364; SSE41: # %bb.0: 365; SSE41-NEXT: pmulhuw {{.*}}(%rip), %xmm0 366; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 367; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 368; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 369; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 370; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 371; SSE41-NEXT: retq 372; 373; AVX-LABEL: combine_vec_shl_zext_lshr1: 374; AVX: # %bb.0: 375; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm0 376; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 377; AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 378; AVX-NEXT: retq 379 %1 = lshr <8 x i16> %x, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8> 380 %2 = zext <8 x i16> %1 to <8 x i32> 381 %3 = shl <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 382 ret <8 x i32> %3 383} 384 385; fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 386define <4 x i32> @combine_vec_shl_ge_ashr_extact0(<4 x i32> %x) { 387; SSE-LABEL: combine_vec_shl_ge_ashr_extact0: 388; SSE: # %bb.0: 389; SSE-NEXT: pslld $2, %xmm0 390; SSE-NEXT: retq 391; 392; AVX-LABEL: combine_vec_shl_ge_ashr_extact0: 393; AVX: # %bb.0: 394; AVX-NEXT: vpslld $2, %xmm0, %xmm0 395; AVX-NEXT: retq 396 %1 = ashr exact <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 397 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5> 398 ret <4 x i32> %2 399} 400 401define <4 x i32> @combine_vec_shl_ge_ashr_extact1(<4 x i32> %x) { 402; SSE2-LABEL: combine_vec_shl_ge_ashr_extact1: 403; SSE2: # %bb.0: 404; SSE2-NEXT: movdqa %xmm0, %xmm1 405; SSE2-NEXT: psrad $8, %xmm1 406; SSE2-NEXT: movdqa %xmm0, %xmm2 407; SSE2-NEXT: psrad $5, %xmm2 408; SSE2-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 409; SSE2-NEXT: movdqa %xmm0, %xmm1 410; SSE2-NEXT: psrad $4, %xmm1 411; SSE2-NEXT: psrad $3, %xmm0 412; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 413; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3] 414; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32,64,128,256] 415; SSE2-NEXT: movaps %xmm0, %xmm1 416; SSE2-NEXT: pmuludq %xmm2, %xmm1 417; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 418; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3] 419; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 420; SSE2-NEXT: pmuludq %xmm0, %xmm2 421; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 422; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 423; SSE2-NEXT: movdqa %xmm1, %xmm0 424; SSE2-NEXT: retq 425; 426; SSE41-LABEL: combine_vec_shl_ge_ashr_extact1: 427; SSE41: # %bb.0: 428; SSE41-NEXT: movdqa %xmm0, %xmm1 429; SSE41-NEXT: psrad $8, %xmm1 430; SSE41-NEXT: movdqa %xmm0, %xmm2 431; SSE41-NEXT: psrad $4, %xmm2 432; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 433; SSE41-NEXT: movdqa %xmm0, %xmm1 434; SSE41-NEXT: psrad $5, %xmm1 435; SSE41-NEXT: psrad $3, %xmm0 436; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 437; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 438; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 439; SSE41-NEXT: retq 440; 441; AVX-LABEL: combine_vec_shl_ge_ashr_extact1: 442; AVX: # %bb.0: 443; AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 444; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 445; AVX-NEXT: retq 446 %1 = ashr exact <4 x i32> %x, <i32 3, i32 4, i32 5, i32 8> 447 %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8> 448 ret <4 x i32> %2 449} 450 451; fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 452define <4 x i32> @combine_vec_shl_lt_ashr_extact0(<4 x i32> %x) { 453; SSE-LABEL: combine_vec_shl_lt_ashr_extact0: 454; SSE: # %bb.0: 455; SSE-NEXT: psrad $2, %xmm0 456; SSE-NEXT: retq 457; 458; AVX-LABEL: combine_vec_shl_lt_ashr_extact0: 459; AVX: # %bb.0: 460; AVX-NEXT: vpsrad $2, %xmm0, %xmm0 461; AVX-NEXT: retq 462 %1 = ashr exact <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 463 %2 = shl <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 464 ret <4 x i32> %2 465} 466 467define <4 x i32> @combine_vec_shl_lt_ashr_extact1(<4 x i32> %x) { 468; SSE2-LABEL: combine_vec_shl_lt_ashr_extact1: 469; SSE2: # %bb.0: 470; SSE2-NEXT: movdqa %xmm0, %xmm1 471; SSE2-NEXT: psrad $8, %xmm1 472; SSE2-NEXT: movdqa %xmm0, %xmm2 473; SSE2-NEXT: psrad $7, %xmm2 474; SSE2-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 475; SSE2-NEXT: movdqa %xmm0, %xmm1 476; SSE2-NEXT: psrad $6, %xmm1 477; SSE2-NEXT: psrad $5, %xmm0 478; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 479; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3] 480; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [8,16,32,256] 481; SSE2-NEXT: movaps %xmm0, %xmm1 482; SSE2-NEXT: pmuludq %xmm2, %xmm1 483; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 484; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3] 485; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 486; SSE2-NEXT: pmuludq %xmm0, %xmm2 487; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 488; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 489; SSE2-NEXT: movdqa %xmm1, %xmm0 490; SSE2-NEXT: retq 491; 492; SSE41-LABEL: combine_vec_shl_lt_ashr_extact1: 493; SSE41: # %bb.0: 494; SSE41-NEXT: movdqa %xmm0, %xmm1 495; SSE41-NEXT: psrad $8, %xmm1 496; SSE41-NEXT: movdqa %xmm0, %xmm2 497; SSE41-NEXT: psrad $6, %xmm2 498; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 499; SSE41-NEXT: movdqa %xmm0, %xmm1 500; SSE41-NEXT: psrad $7, %xmm1 501; SSE41-NEXT: psrad $5, %xmm0 502; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 503; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 504; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 505; SSE41-NEXT: retq 506; 507; AVX-LABEL: combine_vec_shl_lt_ashr_extact1: 508; AVX: # %bb.0: 509; AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 510; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 511; AVX-NEXT: retq 512 %1 = ashr exact <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 513 %2 = shl <4 x i32> %1, <i32 3, i32 4, i32 5, i32 8> 514 ret <4 x i32> %2 515} 516 517; fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) if C2 > C1 518define <4 x i32> @combine_vec_shl_gt_lshr0(<4 x i32> %x) { 519; SSE-LABEL: combine_vec_shl_gt_lshr0: 520; SSE: # %bb.0: 521; SSE-NEXT: pslld $2, %xmm0 522; SSE-NEXT: pand {{.*}}(%rip), %xmm0 523; SSE-NEXT: retq 524; 525; AVX-LABEL: combine_vec_shl_gt_lshr0: 526; AVX: # %bb.0: 527; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967264,4294967264,4294967264,4294967264] 528; AVX-NEXT: vpslld $2, %xmm0, %xmm0 529; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 530; AVX-NEXT: retq 531 %1 = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 532 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5> 533 ret <4 x i32> %2 534} 535 536define <4 x i32> @combine_vec_shl_gt_lshr1(<4 x i32> %x) { 537; SSE2-LABEL: combine_vec_shl_gt_lshr1: 538; SSE2: # %bb.0: 539; SSE2-NEXT: movdqa %xmm0, %xmm1 540; SSE2-NEXT: psrld $8, %xmm1 541; SSE2-NEXT: movdqa %xmm0, %xmm2 542; SSE2-NEXT: psrld $5, %xmm2 543; SSE2-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 544; SSE2-NEXT: movdqa %xmm0, %xmm1 545; SSE2-NEXT: psrld $4, %xmm1 546; SSE2-NEXT: psrld $3, %xmm0 547; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 548; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3] 549; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32,64,128,256] 550; SSE2-NEXT: movaps %xmm0, %xmm1 551; SSE2-NEXT: pmuludq %xmm2, %xmm1 552; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 553; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3] 554; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 555; SSE2-NEXT: pmuludq %xmm0, %xmm2 556; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 557; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 558; SSE2-NEXT: movdqa %xmm1, %xmm0 559; SSE2-NEXT: retq 560; 561; SSE41-LABEL: combine_vec_shl_gt_lshr1: 562; SSE41: # %bb.0: 563; SSE41-NEXT: movdqa %xmm0, %xmm1 564; SSE41-NEXT: psrld $8, %xmm1 565; SSE41-NEXT: movdqa %xmm0, %xmm2 566; SSE41-NEXT: psrld $4, %xmm2 567; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 568; SSE41-NEXT: movdqa %xmm0, %xmm1 569; SSE41-NEXT: psrld $5, %xmm1 570; SSE41-NEXT: psrld $3, %xmm0 571; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 572; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 573; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 574; SSE41-NEXT: retq 575; 576; AVX-LABEL: combine_vec_shl_gt_lshr1: 577; AVX: # %bb.0: 578; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 579; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 580; AVX-NEXT: retq 581 %1 = lshr <4 x i32> %x, <i32 3, i32 4, i32 5, i32 8> 582 %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8> 583 ret <4 x i32> %2 584} 585 586; fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 >= C2 587define <4 x i32> @combine_vec_shl_le_lshr0(<4 x i32> %x) { 588; SSE-LABEL: combine_vec_shl_le_lshr0: 589; SSE: # %bb.0: 590; SSE-NEXT: psrld $2, %xmm0 591; SSE-NEXT: pand {{.*}}(%rip), %xmm0 592; SSE-NEXT: retq 593; 594; AVX-LABEL: combine_vec_shl_le_lshr0: 595; AVX: # %bb.0: 596; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1073741816,1073741816,1073741816,1073741816] 597; AVX-NEXT: vpsrld $2, %xmm0, %xmm0 598; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 599; AVX-NEXT: retq 600 %1 = lshr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 601 %2 = shl <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 602 ret <4 x i32> %2 603} 604 605define <4 x i32> @combine_vec_shl_le_lshr1(<4 x i32> %x) { 606; SSE2-LABEL: combine_vec_shl_le_lshr1: 607; SSE2: # %bb.0: 608; SSE2-NEXT: movdqa %xmm0, %xmm1 609; SSE2-NEXT: psrld $8, %xmm1 610; SSE2-NEXT: movdqa %xmm0, %xmm2 611; SSE2-NEXT: psrld $7, %xmm2 612; SSE2-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 613; SSE2-NEXT: movdqa %xmm0, %xmm1 614; SSE2-NEXT: psrld $6, %xmm1 615; SSE2-NEXT: psrld $5, %xmm0 616; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 617; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3] 618; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [8,16,32,256] 619; SSE2-NEXT: movaps %xmm0, %xmm1 620; SSE2-NEXT: pmuludq %xmm2, %xmm1 621; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 622; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3] 623; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 624; SSE2-NEXT: pmuludq %xmm0, %xmm2 625; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 626; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 627; SSE2-NEXT: movdqa %xmm1, %xmm0 628; SSE2-NEXT: retq 629; 630; SSE41-LABEL: combine_vec_shl_le_lshr1: 631; SSE41: # %bb.0: 632; SSE41-NEXT: movdqa %xmm0, %xmm1 633; SSE41-NEXT: psrld $8, %xmm1 634; SSE41-NEXT: movdqa %xmm0, %xmm2 635; SSE41-NEXT: psrld $6, %xmm2 636; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 637; SSE41-NEXT: movdqa %xmm0, %xmm1 638; SSE41-NEXT: psrld $7, %xmm1 639; SSE41-NEXT: psrld $5, %xmm0 640; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 641; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 642; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 643; SSE41-NEXT: retq 644; 645; AVX-LABEL: combine_vec_shl_le_lshr1: 646; AVX: # %bb.0: 647; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 648; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 649; AVX-NEXT: retq 650 %1 = lshr <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 651 %2 = shl <4 x i32> %1, <i32 3, i32 4, i32 5, i32 8> 652 ret <4 x i32> %2 653} 654 655; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 656define <4 x i32> @combine_vec_shl_ashr0(<4 x i32> %x) { 657; SSE-LABEL: combine_vec_shl_ashr0: 658; SSE: # %bb.0: 659; SSE-NEXT: andps {{.*}}(%rip), %xmm0 660; SSE-NEXT: retq 661; 662; AVX-LABEL: combine_vec_shl_ashr0: 663; AVX: # %bb.0: 664; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [4294967264,4294967264,4294967264,4294967264] 665; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 666; AVX-NEXT: retq 667 %1 = ashr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 668 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5> 669 ret <4 x i32> %2 670} 671 672define <4 x i32> @combine_vec_shl_ashr1(<4 x i32> %x) { 673; SSE-LABEL: combine_vec_shl_ashr1: 674; SSE: # %bb.0: 675; SSE-NEXT: andps {{.*}}(%rip), %xmm0 676; SSE-NEXT: retq 677; 678; AVX-LABEL: combine_vec_shl_ashr1: 679; AVX: # %bb.0: 680; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 681; AVX-NEXT: retq 682 %1 = ashr <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 683 %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8> 684 ret <4 x i32> %2 685} 686 687; fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 688define <4 x i32> @combine_vec_shl_add0(<4 x i32> %x) { 689; SSE-LABEL: combine_vec_shl_add0: 690; SSE: # %bb.0: 691; SSE-NEXT: pslld $2, %xmm0 692; SSE-NEXT: paddd {{.*}}(%rip), %xmm0 693; SSE-NEXT: retq 694; 695; AVX-LABEL: combine_vec_shl_add0: 696; AVX: # %bb.0: 697; AVX-NEXT: vpslld $2, %xmm0, %xmm0 698; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] 699; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 700; AVX-NEXT: retq 701 %1 = add <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 702 %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2> 703 ret <4 x i32> %2 704} 705 706define <4 x i32> @combine_vec_shl_add1(<4 x i32> %x) { 707; SSE2-LABEL: combine_vec_shl_add1: 708; SSE2: # %bb.0: 709; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,16] 710; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 711; SSE2-NEXT: pmuludq %xmm1, %xmm0 712; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 713; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 714; SSE2-NEXT: pmuludq %xmm2, %xmm1 715; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 716; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 717; SSE2-NEXT: paddd {{.*}}(%rip), %xmm0 718; SSE2-NEXT: retq 719; 720; SSE41-LABEL: combine_vec_shl_add1: 721; SSE41: # %bb.0: 722; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 723; SSE41-NEXT: paddd {{.*}}(%rip), %xmm0 724; SSE41-NEXT: retq 725; 726; AVX-LABEL: combine_vec_shl_add1: 727; AVX: # %bb.0: 728; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 729; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 730; AVX-NEXT: retq 731 %1 = add <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 732 %2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4> 733 ret <4 x i32> %2 734} 735 736; fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 737define <4 x i32> @combine_vec_shl_or0(<4 x i32> %x) { 738; SSE-LABEL: combine_vec_shl_or0: 739; SSE: # %bb.0: 740; SSE-NEXT: pslld $2, %xmm0 741; SSE-NEXT: por {{.*}}(%rip), %xmm0 742; SSE-NEXT: retq 743; 744; AVX-LABEL: combine_vec_shl_or0: 745; AVX: # %bb.0: 746; AVX-NEXT: vpslld $2, %xmm0, %xmm0 747; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] 748; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 749; AVX-NEXT: retq 750 %1 = or <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 751 %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2> 752 ret <4 x i32> %2 753} 754 755define <4 x i32> @combine_vec_shl_or1(<4 x i32> %x) { 756; SSE2-LABEL: combine_vec_shl_or1: 757; SSE2: # %bb.0: 758; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,16] 759; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 760; SSE2-NEXT: pmuludq %xmm1, %xmm0 761; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 762; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 763; SSE2-NEXT: pmuludq %xmm2, %xmm1 764; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 765; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 766; SSE2-NEXT: por {{.*}}(%rip), %xmm0 767; SSE2-NEXT: retq 768; 769; SSE41-LABEL: combine_vec_shl_or1: 770; SSE41: # %bb.0: 771; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 772; SSE41-NEXT: por {{.*}}(%rip), %xmm0 773; SSE41-NEXT: retq 774; 775; AVX-LABEL: combine_vec_shl_or1: 776; AVX: # %bb.0: 777; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 778; AVX-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 779; AVX-NEXT: retq 780 %1 = or <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 781 %2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4> 782 ret <4 x i32> %2 783} 784 785; fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) 786define <4 x i32> @combine_vec_shl_mul0(<4 x i32> %x) { 787; SSE2-LABEL: combine_vec_shl_mul0: 788; SSE2: # %bb.0: 789; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [20,20,20,20] 790; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 791; SSE2-NEXT: pmuludq %xmm1, %xmm0 792; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 793; SSE2-NEXT: pmuludq %xmm1, %xmm2 794; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 795; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 796; SSE2-NEXT: retq 797; 798; SSE41-LABEL: combine_vec_shl_mul0: 799; SSE41: # %bb.0: 800; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 801; SSE41-NEXT: retq 802; 803; AVX-LABEL: combine_vec_shl_mul0: 804; AVX: # %bb.0: 805; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] 806; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 807; AVX-NEXT: retq 808 %1 = mul <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 809 %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2> 810 ret <4 x i32> %2 811} 812 813define <4 x i32> @combine_vec_shl_mul1(<4 x i32> %x) { 814; SSE2-LABEL: combine_vec_shl_mul1: 815; SSE2: # %bb.0: 816; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [10,24,56,128] 817; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 818; SSE2-NEXT: pmuludq %xmm1, %xmm0 819; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 820; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 821; SSE2-NEXT: pmuludq %xmm2, %xmm1 822; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 823; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 824; SSE2-NEXT: retq 825; 826; SSE41-LABEL: combine_vec_shl_mul1: 827; SSE41: # %bb.0: 828; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 829; SSE41-NEXT: retq 830; 831; AVX-LABEL: combine_vec_shl_mul1: 832; AVX: # %bb.0: 833; AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 834; AVX-NEXT: retq 835 %1 = mul <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 836 %2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4> 837 ret <4 x i32> %2 838} 839