1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSE2 3; RUN: llc < %s -mtriple=i686-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSSE3 4; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK-AVX 5 6define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind { 7; CHECK-SSE-LABEL: test1: 8; CHECK-SSE: # %bb.0: 9; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0] 10; CHECK-SSE-NEXT: retl 11; 12; CHECK-AVX-LABEL: test1: 13; CHECK-AVX: # %bb.0: 14; CHECK-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,0] 15; CHECK-AVX-NEXT: retl 16 %C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 > 17 ret <4 x i32> %C 18} 19 20define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind { 21; CHECK-SSE2-LABEL: test2: 22; CHECK-SSE2: # %bb.0: 23; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 24; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0] 25; CHECK-SSE2-NEXT: retl 26; 27; CHECK-SSSE3-LABEL: test2: 28; CHECK-SSSE3: # %bb.0: 29; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] 30; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0 31; CHECK-SSSE3-NEXT: retl 32; 33; CHECK-AVX-LABEL: test2: 34; CHECK-AVX: # %bb.0: 35; CHECK-AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] 36; CHECK-AVX-NEXT: retl 37 %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 3, i32 4 > 38 ret <4 x i32> %C 39} 40 41define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind { 42; CHECK-SSE2-LABEL: test3: 43; CHECK-SSE2: # %bb.0: 44; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0] 45; CHECK-SSE2-NEXT: retl 46; 47; CHECK-SSSE3-LABEL: test3: 48; CHECK-SSSE3: # %bb.0: 49; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] 50; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0 51; CHECK-SSSE3-NEXT: retl 52; 53; CHECK-AVX-LABEL: test3: 54; CHECK-AVX: # %bb.0: 55; CHECK-AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] 56; CHECK-AVX-NEXT: retl 57 %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 > 58 ret <4 x i32> %C 59} 60 61define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind { 62; CHECK-SSE2-LABEL: test4: 63; CHECK-SSE2: # %bb.0: 64; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1] 65; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0 66; CHECK-SSE2-NEXT: retl 67; 68; CHECK-SSSE3-LABEL: test4: 69; CHECK-SSSE3: # %bb.0: 70; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 71; CHECK-SSSE3-NEXT: retl 72; 73; CHECK-AVX-LABEL: test4: 74; CHECK-AVX: # %bb.0: 75; CHECK-AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 76; CHECK-AVX-NEXT: retl 77 %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 > 78 ret <4 x i32> %C 79} 80 81define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind { 82; CHECK-SSE-LABEL: test5: 83; CHECK-SSE: # %bb.0: 84; CHECK-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1] 85; CHECK-SSE-NEXT: movaps %xmm1, %xmm0 86; CHECK-SSE-NEXT: retl 87; 88; CHECK-AVX-LABEL: test5: 89; CHECK-AVX: # %bb.0: 90; CHECK-AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm1[1],xmm0[0] 91; CHECK-AVX-NEXT: retl 92 %C = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 > 93 ret <4 x float> %C 94} 95 96define <8 x i16> @test6(<8 x i16> %A, <8 x i16> %B) nounwind { 97; CHECK-SSE2-LABEL: test6: 98; CHECK-SSE2: # %bb.0: 99; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 100; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5] 101; CHECK-SSE2-NEXT: por %xmm1, %xmm0 102; CHECK-SSE2-NEXT: retl 103; 104; CHECK-SSSE3-LABEL: test6: 105; CHECK-SSSE3: # %bb.0: 106; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 107; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0 108; CHECK-SSSE3-NEXT: retl 109; 110; CHECK-AVX-LABEL: test6: 111; CHECK-AVX: # %bb.0: 112; CHECK-AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 113; CHECK-AVX-NEXT: retl 114 %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 > 115 ret <8 x i16> %C 116} 117 118define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind { 119; CHECK-SSE2-LABEL: test7: 120; CHECK-SSE2: # %bb.0: 121; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 122; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9] 123; CHECK-SSE2-NEXT: por %xmm1, %xmm0 124; CHECK-SSE2-NEXT: retl 125; 126; CHECK-SSSE3-LABEL: test7: 127; CHECK-SSSE3: # %bb.0: 128; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 129; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0 130; CHECK-SSSE3-NEXT: retl 131; 132; CHECK-AVX-LABEL: test7: 133; CHECK-AVX: # %bb.0: 134; CHECK-AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 135; CHECK-AVX-NEXT: retl 136 %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 > 137 ret <8 x i16> %C 138} 139 140define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind { 141; CHECK-SSE2-LABEL: test8: 142; CHECK-SSE2: # %bb.0: 143; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 144; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4] 145; CHECK-SSE2-NEXT: por %xmm1, %xmm0 146; CHECK-SSE2-NEXT: retl 147; 148; CHECK-SSSE3-LABEL: test8: 149; CHECK-SSSE3: # %bb.0: 150; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4] 151; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0 152; CHECK-SSSE3-NEXT: retl 153; 154; CHECK-AVX-LABEL: test8: 155; CHECK-AVX: # %bb.0: 156; CHECK-AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4] 157; CHECK-AVX-NEXT: retl 158 %C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 > 159 ret <16 x i8> %C 160} 161 162; Check that we don't do unary (circular on single operand) palignr incorrectly. 163; (It is possible, but before this testcase was committed, it was being done 164; incorrectly. In particular, one of the operands of the palignr node 165; was an UNDEF.) 166define <8 x i16> @test9(<8 x i16> %A, <8 x i16> %B) nounwind { 167; CHECK-SSE2-LABEL: test9: 168; CHECK-SSE2: # %bb.0: 169; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 170; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 171; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1] 172; CHECK-SSE2-NEXT: por %xmm1, %xmm0 173; CHECK-SSE2-NEXT: retl 174; 175; CHECK-SSSE3-LABEL: test9: 176; CHECK-SSSE3: # %bb.0: 177; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0 178; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 179; CHECK-SSSE3-NEXT: retl 180; 181; CHECK-AVX-LABEL: test9: 182; CHECK-AVX: # %bb.0: 183; CHECK-AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 184; CHECK-AVX-NEXT: retl 185 %C = shufflevector <8 x i16> %B, <8 x i16> %A, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 > 186 ret <8 x i16> %C 187} 188 189