1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 4 5; Verify that we correctly fold target specific packed vector shifts by 6; immediate count into a simple build_vector when the elements of the vector 7; in input to the packed shift are all constants or undef. 8 9define <8 x i16> @test1() { 10; X32-LABEL: test1: 11; X32: # BB#0: 12; X32-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64,8,16,32,64] 13; X32-NEXT: retl 14; 15; X64-LABEL: test1: 16; X64: # BB#0: 17; X64-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64,8,16,32,64] 18; X64-NEXT: retq 19 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> <i16 1, i16 2, i16 4, i16 8, i16 1, i16 2, i16 4, i16 8>, i32 3) 20 ret <8 x i16> %1 21} 22 23define <8 x i16> @test2() { 24; X32-LABEL: test2: 25; X32: # BB#0: 26; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4] 27; X32-NEXT: retl 28; 29; X64-LABEL: test2: 30; X64: # BB#0: 31; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4] 32; X64-NEXT: retq 33 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3) 34 ret <8 x i16> %1 35} 36 37define <8 x i16> @test3() { 38; X32-LABEL: test3: 39; X32: # BB#0: 40; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4] 41; X32-NEXT: retl 42; 43; X64-LABEL: test3: 44; X64: # BB#0: 45; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4] 46; X64-NEXT: retq 47 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3) 48 ret <8 x i16> %1 49} 50 51define <4 x i32> @test4() { 52; X32-LABEL: test4: 53; X32: # BB#0: 54; X32-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64] 55; X32-NEXT: retl 56; 57; X64-LABEL: test4: 58; X64: # BB#0: 59; X64-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64] 60; X64-NEXT: retq 61 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 1, i32 2, i32 4, i32 8>, i32 3) 62 ret <4 x i32> %1 63} 64 65define <4 x i32> @test5() { 66; X32-LABEL: test5: 67; X32: # BB#0: 68; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4] 69; X32-NEXT: retl 70; 71; X64-LABEL: test5: 72; X64: # BB#0: 73; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4] 74; X64-NEXT: retq 75 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3) 76 ret <4 x i32> %1 77} 78 79define <4 x i32> @test6() { 80; X32-LABEL: test6: 81; X32: # BB#0: 82; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4] 83; X32-NEXT: retl 84; 85; X64-LABEL: test6: 86; X64: # BB#0: 87; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4] 88; X64-NEXT: retq 89 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3) 90 ret <4 x i32> %1 91} 92 93define <2 x i64> @test7() { 94; X32-LABEL: test7: 95; X32: # BB#0: 96; X32-NEXT: movdqa {{.*#+}} xmm0 = [1,0,2,0] 97; X32-NEXT: psllq $3, %xmm0 98; X32-NEXT: retl 99; 100; X64-LABEL: test7: 101; X64: # BB#0: 102; X64-NEXT: movaps {{.*#+}} xmm0 = [8,16] 103; X64-NEXT: retq 104 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 1, i64 2>, i32 3) 105 ret <2 x i64> %1 106} 107 108define <2 x i64> @test8() { 109; X32-LABEL: test8: 110; X32: # BB#0: 111; X32-NEXT: movdqa {{.*#+}} xmm0 = [8,0,16,0] 112; X32-NEXT: psrlq $3, %xmm0 113; X32-NEXT: retl 114; 115; X64-LABEL: test8: 116; X64: # BB#0: 117; X64-NEXT: movaps {{.*#+}} xmm0 = [1,2] 118; X64-NEXT: retq 119 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 8, i64 16>, i32 3) 120 ret <2 x i64> %1 121} 122 123define <8 x i16> @test9() { 124; X32-LABEL: test9: 125; X32: # BB#0: 126; X32-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 127; X32-NEXT: retl 128; 129; X64-LABEL: test9: 130; X64: # BB#0: 131; X64-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 132; X64-NEXT: retq 133 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3) 134 ret <8 x i16> %1 135} 136 137define <4 x i32> @test10() { 138; X32-LABEL: test10: 139; X32: # BB#0: 140; X32-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4> 141; X32-NEXT: retl 142; 143; X64-LABEL: test10: 144; X64: # BB#0: 145; X64-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4> 146; X64-NEXT: retq 147 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3) 148 ret <4 x i32> %1 149} 150 151define <2 x i64> @test11() { 152; X32-LABEL: test11: 153; X32: # BB#0: 154; X32-NEXT: movdqa {{.*#+}} xmm0 = <u,u,31,0> 155; X32-NEXT: psrlq $3, %xmm0 156; X32-NEXT: retl 157; 158; X64-LABEL: test11: 159; X64: # BB#0: 160; X64-NEXT: movaps {{.*#+}} xmm0 = <u,3> 161; X64-NEXT: retq 162 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 undef, i64 31>, i32 3) 163 ret <2 x i64> %1 164} 165 166define <8 x i16> @test12() { 167; X32-LABEL: test12: 168; X32: # BB#0: 169; X32-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 170; X32-NEXT: retl 171; 172; X64-LABEL: test12: 173; X64: # BB#0: 174; X64-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 175; X64-NEXT: retq 176 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3) 177 ret <8 x i16> %1 178} 179 180define <4 x i32> @test13() { 181; X32-LABEL: test13: 182; X32: # BB#0: 183; X32-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4> 184; X32-NEXT: retl 185; 186; X64-LABEL: test13: 187; X64: # BB#0: 188; X64-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4> 189; X64-NEXT: retq 190 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3) 191 ret <4 x i32> %1 192} 193 194define <8 x i16> @test14() { 195; X32-LABEL: test14: 196; X32: # BB#0: 197; X32-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 198; X32-NEXT: retl 199; 200; X64-LABEL: test14: 201; X64: # BB#0: 202; X64-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 203; X64-NEXT: retq 204 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3) 205 ret <8 x i16> %1 206} 207 208define <4 x i32> @test15() { 209; X32-LABEL: test15: 210; X32: # BB#0: 211; X32-NEXT: movaps {{.*#+}} xmm0 = <u,64,u,256> 212; X32-NEXT: retl 213; 214; X64-LABEL: test15: 215; X64: # BB#0: 216; X64-NEXT: movaps {{.*#+}} xmm0 = <u,64,u,256> 217; X64-NEXT: retq 218 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3) 219 ret <4 x i32> %1 220} 221 222define <2 x i64> @test16() { 223; X32-LABEL: test16: 224; X32: # BB#0: 225; X32-NEXT: movdqa {{.*#+}} xmm0 = <u,u,31,0> 226; X32-NEXT: psllq $3, %xmm0 227; X32-NEXT: retl 228; 229; X64-LABEL: test16: 230; X64: # BB#0: 231; X64-NEXT: movaps {{.*#+}} xmm0 = <u,248> 232; X64-NEXT: retq 233 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 undef, i64 31>, i32 3) 234 ret <2 x i64> %1 235} 236 237declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) 238declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) 239declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) 240declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) 241declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) 242declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) 243declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) 244declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) 245 246