• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VLBW
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512vbmi,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VLVBMI
10
11define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) {
12; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
13; SSE2:       # %bb.0:
14; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
15; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
16; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
17; SSE2-NEXT:    retq
18;
19; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
20; SSSE3:       # %bb.0:
21; SSSE3-NEXT:    pxor %xmm1, %xmm1
22; SSSE3-NEXT:    pshufb %xmm1, %xmm0
23; SSSE3-NEXT:    retq
24;
25; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
26; SSE41:       # %bb.0:
27; SSE41-NEXT:    pxor %xmm1, %xmm1
28; SSE41-NEXT:    pshufb %xmm1, %xmm0
29; SSE41-NEXT:    retq
30;
31; AVX1-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
32; AVX1:       # %bb.0:
33; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
35; AVX1-NEXT:    retq
36;
37; AVX2OR512VL-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
38; AVX2OR512VL:       # %bb.0:
39; AVX2OR512VL-NEXT:    vpbroadcastb %xmm0, %xmm0
40; AVX2OR512VL-NEXT:    retq
41  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
42  ret <16 x i8> %shuffle
43}
44
45define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) {
46; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
47; SSE2:       # %bb.0:
48; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
49; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
50; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
51; SSE2-NEXT:    retq
52;
53; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
54; SSSE3:       # %bb.0:
55; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
56; SSSE3-NEXT:    retq
57;
58; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
59; SSE41:       # %bb.0:
60; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
61; SSE41-NEXT:    retq
62;
63; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
64; AVX:       # %bb.0:
65; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
66; AVX-NEXT:    retq
67  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
68  ret <16 x i8> %shuffle
69}
70
71define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) {
72; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
73; SSE2:       # %bb.0:
74; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
75; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
76; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
77; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
78; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
79; SSE2-NEXT:    retq
80;
81; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
82; SSSE3:       # %bb.0:
83; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
84; SSSE3-NEXT:    retq
85;
86; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
87; SSE41:       # %bb.0:
88; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
89; SSE41-NEXT:    retq
90;
91; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
92; AVX:       # %bb.0:
93; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
94; AVX-NEXT:    retq
95  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
96  ret <16 x i8> %shuffle
97}
98
99define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) {
100; SSE-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
101; SSE:       # %bb.0:
102; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
103; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
104; SSE-NEXT:    retq
105;
106; AVX1-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
107; AVX1:       # %bb.0:
108; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
109; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
110; AVX1-NEXT:    retq
111;
112; AVX2-SLOW-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
113; AVX2-SLOW:       # %bb.0:
114; AVX2-SLOW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
115; AVX2-SLOW-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
116; AVX2-SLOW-NEXT:    retq
117;
118; AVX2-FAST-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
119; AVX2-FAST:       # %bb.0:
120; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3]
121; AVX2-FAST-NEXT:    retq
122;
123; AVX512VL-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
124; AVX512VL:       # %bb.0:
125; AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3]
126; AVX512VL-NEXT:    retq
127  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
128  ret <16 x i8> %shuffle
129}
130
131define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) {
132; SSE-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
133; SSE:       # %bb.0:
134; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
135; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
136; SSE-NEXT:    retq
137;
138; AVX1-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
139; AVX1:       # %bb.0:
140; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
141; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
142; AVX1-NEXT:    retq
143;
144; AVX2-SLOW-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
145; AVX2-SLOW:       # %bb.0:
146; AVX2-SLOW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
147; AVX2-SLOW-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
148; AVX2-SLOW-NEXT:    retq
149;
150; AVX2-FAST-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
151; AVX2-FAST:       # %bb.0:
152; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7]
153; AVX2-FAST-NEXT:    retq
154;
155; AVX512VL-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
156; AVX512VL:       # %bb.0:
157; AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7]
158; AVX512VL-NEXT:    retq
159  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
160  ret <16 x i8> %shuffle
161}
162
163define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) {
164; SSE2-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
165; SSE2:       # %bb.0:
166; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
167; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
168; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
169; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
170; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
171; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
172; SSE2-NEXT:    retq
173;
174; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
175; SSSE3:       # %bb.0:
176; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
177; SSSE3-NEXT:    retq
178;
179; SSE41-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
180; SSE41:       # %bb.0:
181; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
182; SSE41-NEXT:    retq
183;
184; AVX-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
185; AVX:       # %bb.0:
186; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
187; AVX-NEXT:    retq
188  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
189  ret <16 x i8> %shuffle
190}
191
192define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) {
193; SSE-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07:
194; SSE:       # %bb.0:
195; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
196; SSE-NEXT:    retq
197;
198; AVX-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07:
199; AVX:       # %bb.0:
200; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
201; AVX-NEXT:    retq
202  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
203  ret <16 x i8> %shuffle
204}
205
206define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
207; SSE-LABEL: shuffle_v16i8_0101010101010101:
208; SSE:       # %bb.0:
209; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
210; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
211; SSE-NEXT:    retq
212;
213; AVX1-LABEL: shuffle_v16i8_0101010101010101:
214; AVX1:       # %bb.0:
215; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
216; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
217; AVX1-NEXT:    retq
218;
219; AVX2OR512VL-LABEL: shuffle_v16i8_0101010101010101:
220; AVX2OR512VL:       # %bb.0:
221; AVX2OR512VL-NEXT:    vpbroadcastw %xmm0, %xmm0
222; AVX2OR512VL-NEXT:    retq
223  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
224  ret <16 x i8> %shuffle
225}
226
227define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) {
228; SSE-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
229; SSE:       # %bb.0:
230; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
231; SSE-NEXT:    retq
232;
233; AVX-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
234; AVX:       # %bb.0:
235; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
236; AVX-NEXT:    retq
237  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
238  ret <16 x i8> %shuffle
239}
240
241define <16 x i8> @shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31(<16 x i8> %a, <16 x i8> %b) {
242; SSE-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
243; SSE:       # %bb.0:
244; SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
245; SSE-NEXT:    retq
246;
247; AVX-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
248; AVX:       # %bb.0:
249; AVX-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
250; AVX-NEXT:    retq
251  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
252  ret <16 x i8> %shuffle
253}
254
255define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
256; SSE2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
257; SSE2:       # %bb.0:
258; SSE2-NEXT:    pxor %xmm2, %xmm2
259; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
260; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
261; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
262; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
263; SSE2-NEXT:    por %xmm2, %xmm0
264; SSE2-NEXT:    retq
265;
266; SSSE3-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
267; SSSE3:       # %bb.0:
268; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
269; SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
270; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
271; SSSE3-NEXT:    movdqa %xmm1, %xmm0
272; SSSE3-NEXT:    retq
273;
274; SSE41-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
275; SSE41:       # %bb.0:
276; SSE41-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
277; SSE41-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
278; SSE41-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
279; SSE41-NEXT:    movdqa %xmm1, %xmm0
280; SSE41-NEXT:    retq
281;
282; AVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
283; AVX1:       # %bb.0:
284; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
285; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
286; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
287; AVX1-NEXT:    retq
288;
289; AVX2OR512VL-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
290; AVX2OR512VL:       # %bb.0:
291; AVX2OR512VL-NEXT:    vpbroadcastb %xmm1, %xmm1
292; AVX2OR512VL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
293; AVX2OR512VL-NEXT:    retq
294  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7>
295  ret <16 x i8> %shuffle
296}
297
298define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) {
299; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
300; SSE2:       # %bb.0:
301; SSE2-NEXT:    pxor %xmm1, %xmm1
302; SSE2-NEXT:    movdqa %xmm0, %xmm2
303; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
304; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
305; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
306; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
307; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
308; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
309; SSE2-NEXT:    packuswb %xmm2, %xmm0
310; SSE2-NEXT:    retq
311;
312; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
313; SSSE3:       # %bb.0:
314; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
315; SSSE3-NEXT:    retq
316;
317; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
318; SSE41:       # %bb.0:
319; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
320; SSE41-NEXT:    retq
321;
322; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
323; AVX:       # %bb.0:
324; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
325; AVX-NEXT:    retq
326  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
327  ret <16 x i8> %shuffle
328}
329
330define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
331; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
332; SSE2:       # %bb.0:
333; SSE2-NEXT:    pxor %xmm2, %xmm2
334; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
335; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
336; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
337; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
338; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
339; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
340; SSE2-NEXT:    packuswb %xmm1, %xmm0
341; SSE2-NEXT:    retq
342;
343; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
344; SSSE3:       # %bb.0:
345; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
346; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
347; SSSE3-NEXT:    retq
348;
349; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
350; SSE41:       # %bb.0:
351; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
352; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
353; SSE41-NEXT:    retq
354;
355; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
356; AVX:       # %bb.0:
357; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
358; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
359; AVX-NEXT:    retq
360  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20>
361  ret <16 x i8> %shuffle
362}
363
364define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
365; SSE2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
366; SSE2:       # %bb.0:
367; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
368; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
369; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
370; SSE2-NEXT:    pxor %xmm1, %xmm1
371; SSE2-NEXT:    movdqa %xmm0, %xmm2
372; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
373; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm2[3,2,1,0,4,5,6,7]
374; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
375; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,6,5,4]
376; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
377; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm0[3,2,1,0,4,5,6,7]
378; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm2[0,1,2,3,7,6,5,4]
379; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
380; SSE2-NEXT:    packuswb %xmm1, %xmm0
381; SSE2-NEXT:    retq
382;
383; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
384; SSSE3:       # %bb.0:
385; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
386; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
387; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
388; SSSE3-NEXT:    retq
389;
390; SSE41-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
391; SSE41:       # %bb.0:
392; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
393; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
394; SSE41-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
395; SSE41-NEXT:    retq
396;
397; AVX-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
398; AVX:       # %bb.0:
399; AVX-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
400; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
401; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
402; AVX-NEXT:    retq
403  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20>
404  ret <16 x i8> %shuffle
405}
406
407define <16 x i8> @shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i8> %a, <16 x i8> %b) {
408; SSE2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
409; SSE2:       # %bb.0:
410; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
411; SSE2-NEXT:    andps %xmm2, %xmm0
412; SSE2-NEXT:    andnps %xmm1, %xmm2
413; SSE2-NEXT:    orps %xmm2, %xmm0
414; SSE2-NEXT:    retq
415;
416; SSSE3-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
417; SSSE3:       # %bb.0:
418; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
419; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
420; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
421; SSSE3-NEXT:    retq
422;
423; SSE41-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
424; SSE41:       # %bb.0:
425; SSE41-NEXT:    movdqa %xmm0, %xmm2
426; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
427; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
428; SSE41-NEXT:    movdqa %xmm1, %xmm0
429; SSE41-NEXT:    retq
430;
431; AVX1OR2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
432; AVX1OR2:       # %bb.0:
433; AVX1OR2-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
434; AVX1OR2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
435; AVX1OR2-NEXT:    retq
436;
437; AVX512VL-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
438; AVX512VL:       # %bb.0:
439; AVX512VL-NEXT:    movw $-21846, %ax # imm = 0xAAAA
440; AVX512VL-NEXT:    kmovd %eax, %k1
441; AVX512VL-NEXT:    vmovdqu8 %xmm1, %xmm0 {%k1}
442; AVX512VL-NEXT:    retq
443  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
444  ret <16 x i8> %shuffle
445}
446
447define <16 x i8> @shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31(<16 x i8> %a, <16 x i8> %b) {
448; SSE2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
449; SSE2:       # %bb.0:
450; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
451; SSE2-NEXT:    andps %xmm2, %xmm0
452; SSE2-NEXT:    andnps %xmm1, %xmm2
453; SSE2-NEXT:    orps %xmm2, %xmm0
454; SSE2-NEXT:    retq
455;
456; SSSE3-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
457; SSSE3:       # %bb.0:
458; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[15]
459; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2],zero,xmm0[4,5,6],zero,xmm0[8,9,10],zero,xmm0[12,13,14],zero
460; SSSE3-NEXT:    por %xmm1, %xmm0
461; SSSE3-NEXT:    retq
462;
463; SSE41-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
464; SSE41:       # %bb.0:
465; SSE41-NEXT:    movdqa %xmm0, %xmm2
466; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
467; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
468; SSE41-NEXT:    movdqa %xmm1, %xmm0
469; SSE41-NEXT:    retq
470;
471; AVX1OR2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
472; AVX1OR2:       # %bb.0:
473; AVX1OR2-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
474; AVX1OR2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
475; AVX1OR2-NEXT:    retq
476;
477; AVX512VL-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
478; AVX512VL:       # %bb.0:
479; AVX512VL-NEXT:    movw $-30584, %ax # imm = 0x8888
480; AVX512VL-NEXT:    kmovd %eax, %k1
481; AVX512VL-NEXT:    vmovdqu8 %xmm1, %xmm0 {%k1}
482; AVX512VL-NEXT:    retq
483  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
484  ret <16 x i8> %shuffle
485}
486
487define <16 x i8> @shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz(<16 x i8> %a) {
488; SSE-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
489; SSE:       # %bb.0:
490; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
491; SSE-NEXT:    retq
492;
493; AVX-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
494; AVX:       # %bb.0:
495; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
496; AVX-NEXT:    retq
497  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
498  ret <16 x i8> %shuffle
499}
500
501define <16 x i8> @shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31(<16 x i8> %a, <16 x i8> %b) {
502; SSE2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
503; SSE2:       # %bb.0:
504; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
505; SSE2-NEXT:    andps %xmm2, %xmm0
506; SSE2-NEXT:    andnps %xmm1, %xmm2
507; SSE2-NEXT:    orps %xmm2, %xmm0
508; SSE2-NEXT:    retq
509;
510; SSSE3-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
511; SSSE3:       # %bb.0:
512; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[4],zero,zero,xmm1[7],zero,zero,zero,zero,xmm1[12],zero,zero,xmm1[15]
513; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,xmm0[5,6],zero,xmm0[8,9,10,11],zero,xmm0[13,14],zero
514; SSSE3-NEXT:    por %xmm1, %xmm0
515; SSSE3-NEXT:    retq
516;
517; SSE41-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
518; SSE41:       # %bb.0:
519; SSE41-NEXT:    movdqa %xmm0, %xmm2
520; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
521; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
522; SSE41-NEXT:    movdqa %xmm1, %xmm0
523; SSE41-NEXT:    retq
524;
525; AVX1OR2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
526; AVX1OR2:       # %bb.0:
527; AVX1OR2-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
528; AVX1OR2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
529; AVX1OR2-NEXT:    retq
530;
531; AVX512VL-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
532; AVX512VL:       # %bb.0:
533; AVX512VL-NEXT:    movw $-28528, %ax # imm = 0x9090
534; AVX512VL-NEXT:    kmovd %eax, %k1
535; AVX512VL-NEXT:    vmovdqu8 %xmm1, %xmm0 {%k1}
536; AVX512VL-NEXT:    retq
537  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 13, i32 14, i32 31>
538  ret <16 x i8> %shuffle
539}
540
541define <16 x i8> @shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15(<16 x i8> %a, <16 x i8> %b) {
542; SSE2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
543; SSE2:       # %bb.0:
544; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
545; SSE2-NEXT:    andps %xmm2, %xmm1
546; SSE2-NEXT:    andnps %xmm0, %xmm2
547; SSE2-NEXT:    orps %xmm1, %xmm2
548; SSE2-NEXT:    movaps %xmm2, %xmm0
549; SSE2-NEXT:    retq
550;
551; SSSE3-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
552; SSSE3:       # %bb.0:
553; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,xmm0[10,11],zero,xmm0[13],zero,xmm0[15]
554; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3],zero,zero,zero,zero,xmm1[8,9],zero,zero,xmm1[12],zero,xmm1[14],zero
555; SSSE3-NEXT:    por %xmm1, %xmm0
556; SSSE3-NEXT:    retq
557;
558; SSE41-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
559; SSE41:       # %bb.0:
560; SSE41-NEXT:    movdqa %xmm0, %xmm2
561; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
562; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
563; SSE41-NEXT:    movdqa %xmm2, %xmm0
564; SSE41-NEXT:    retq
565;
566; AVX1OR2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
567; AVX1OR2:       # %bb.0:
568; AVX1OR2-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
569; AVX1OR2-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
570; AVX1OR2-NEXT:    retq
571;
572; AVX512VL-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
573; AVX512VL:       # %bb.0:
574; AVX512VL-NEXT:    movw $-21264, %ax # imm = 0xACF0
575; AVX512VL-NEXT:    kmovd %eax, %k1
576; AVX512VL-NEXT:    vpblendmb %xmm0, %xmm1, %xmm0 {%k1}
577; AVX512VL-NEXT:    retq
578  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 10, i32 11, i32 28, i32 13, i32 30, i32 15>
579  ret <16 x i8> %shuffle
580}
581
582define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
583; SSE2-LABEL: trunc_v4i32_shuffle:
584; SSE2:       # %bb.0:
585; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
586; SSE2-NEXT:    packuswb %xmm0, %xmm0
587; SSE2-NEXT:    packuswb %xmm0, %xmm0
588; SSE2-NEXT:    retq
589;
590; SSSE3-LABEL: trunc_v4i32_shuffle:
591; SSSE3:       # %bb.0:
592; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
593; SSSE3-NEXT:    retq
594;
595; SSE41-LABEL: trunc_v4i32_shuffle:
596; SSE41:       # %bb.0:
597; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
598; SSE41-NEXT:    retq
599;
600; AVX-LABEL: trunc_v4i32_shuffle:
601; AVX:       # %bb.0:
602; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
603; AVX-NEXT:    retq
604  %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
605  ret <16 x i8> %shuffle
606}
607
608define <16 x i8> @stress_test0(<16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i8> %s.0.7, <16 x i8> %s.0.8, <16 x i8> %s.0.9) {
609; We don't have anything useful to check here. This generates 100s of
610; instructions. Instead, just make sure we survived codegen.
611; ALL-LABEL: stress_test0:
612; ALL:         retq
613entry:
614  %s.1.4 = shufflevector <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i32> <i32 1, i32 22, i32 21, i32 28, i32 3, i32 16, i32 6, i32 1, i32 19, i32 29, i32 12, i32 31, i32 2, i32 3, i32 3, i32 6>
615  %s.1.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i32> <i32 31, i32 20, i32 12, i32 19, i32 2, i32 15, i32 12, i32 31, i32 2, i32 28, i32 2, i32 30, i32 7, i32 8, i32 17, i32 28>
616  %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> %s.0.9, <16 x i32> <i32 14, i32 10, i32 17, i32 5, i32 17, i32 9, i32 17, i32 21, i32 31, i32 24, i32 16, i32 6, i32 20, i32 28, i32 23, i32 8>
617  %s.2.2 = shufflevector <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i32> <i32 20, i32 9, i32 21, i32 11, i32 11, i32 4, i32 3, i32 18, i32 3, i32 30, i32 4, i32 31, i32 11, i32 24, i32 13, i32 29>
618  %s.3.2 = shufflevector <16 x i8> %s.2.2, <16 x i8> %s.1.4, <16 x i32> <i32 15, i32 13, i32 5, i32 11, i32 7, i32 17, i32 14, i32 22, i32 22, i32 16, i32 7, i32 24, i32 16, i32 22, i32 7, i32 29>
619  %s.5.4 = shufflevector <16 x i8> %s.1.5, <16 x i8> %s.1.8, <16 x i32> <i32 3, i32 13, i32 19, i32 7, i32 23, i32 11, i32 1, i32 9, i32 16, i32 25, i32 2, i32 7, i32 0, i32 21, i32 23, i32 17>
620  %s.6.1 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.3.2, <16 x i32> <i32 11, i32 2, i32 28, i32 31, i32 27, i32 3, i32 9, i32 27, i32 25, i32 25, i32 14, i32 7, i32 12, i32 28, i32 12, i32 23>
621  %s.7.1 = shufflevector <16 x i8> %s.6.1, <16 x i8> %s.3.2, <16 x i32> <i32 15, i32 29, i32 14, i32 0, i32 29, i32 15, i32 26, i32 30, i32 6, i32 7, i32 2, i32 8, i32 12, i32 10, i32 29, i32 17>
622  %s.7.2 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.5.4, <16 x i32> <i32 3, i32 29, i32 3, i32 19, i32 undef, i32 20, i32 undef, i32 3, i32 27, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
623  %s.16.0 = shufflevector <16 x i8> %s.7.1, <16 x i8> %s.7.2, <16 x i32> <i32 13, i32 1, i32 16, i32 16, i32 6, i32 7, i32 29, i32 18, i32 19, i32 28, i32 undef, i32 undef, i32 31, i32 1, i32 undef, i32 10>
624  ret <16 x i8> %s.16.0
625}
626
627define <16 x i8> @undef_test1(<16 x i8> %s.0.5, <16 x i8> %s.0.8, <16 x i8> %s.0.9) noinline nounwind {
628; There is nothing interesting to check about these instructions other than
629; that they survive codegen. However, we actually do better and delete all of
630; them because the result is 'undef'.
631;
632; ALL-LABEL: undef_test1:
633; ALL:       # %bb.0: # %entry
634; ALL-NEXT:    retq
635entry:
636  %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> undef, <16 x i32> <i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 6, i32 undef, i32 6, i32 undef, i32 14, i32 14, i32 undef, i32 undef, i32 0>
637  %s.2.4 = shufflevector <16 x i8> undef, <16 x i8> %s.0.5, <16 x i32> <i32 21, i32 undef, i32 undef, i32 19, i32 undef, i32 undef, i32 29, i32 24, i32 21, i32 23, i32 21, i32 17, i32 19, i32 undef, i32 20, i32 22>
638  %s.2.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> undef, <16 x i32> <i32 3, i32 8, i32 undef, i32 7, i32 undef, i32 10, i32 8, i32 0, i32 15, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 9>
639  %s.2.9 = shufflevector <16 x i8> %s.0.9, <16 x i8> undef, <16 x i32> <i32 7, i32 undef, i32 14, i32 7, i32 8, i32 undef, i32 7, i32 8, i32 5, i32 15, i32 undef, i32 1, i32 11, i32 undef, i32 undef, i32 11>
640  %s.3.4 = shufflevector <16 x i8> %s.2.4, <16 x i8> %s.0.5, <16 x i32> <i32 5, i32 0, i32 21, i32 6, i32 15, i32 27, i32 22, i32 21, i32 4, i32 22, i32 19, i32 26, i32 9, i32 26, i32 8, i32 29>
641  %s.3.9 = shufflevector <16 x i8> %s.2.9, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 8, i32 1, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 undef>
642  %s.4.7 = shufflevector <16 x i8> %s.1.8, <16 x i8> %s.2.9, <16 x i32> <i32 9, i32 0, i32 22, i32 20, i32 24, i32 7, i32 21, i32 17, i32 20, i32 12, i32 19, i32 23, i32 2, i32 9, i32 17, i32 10>
643  %s.4.8 = shufflevector <16 x i8> %s.2.9, <16 x i8> %s.3.9, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 10, i32 undef, i32 0, i32 5, i32 undef, i32 9, i32 undef>
644  %s.5.7 = shufflevector <16 x i8> %s.4.7, <16 x i8> %s.4.8, <16 x i32> <i32 16, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
645  %s.8.4 = shufflevector <16 x i8> %s.3.4, <16 x i8> %s.5.7, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 28, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
646  %s.9.4 = shufflevector <16 x i8> %s.8.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 10, i32 5>
647  %s.10.4 = shufflevector <16 x i8> %s.9.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
648  %s.12.4 = shufflevector <16 x i8> %s.10.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef>
649
650  ret <16 x i8> %s.12.4
651}
652
653define <16 x i8> @PR20540(<8 x i8> %a) {
654; SSE2-LABEL: PR20540:
655; SSE2:       # %bb.0:
656; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
657; SSE2-NEXT:    packuswb %xmm0, %xmm0
658; SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
659; SSE2-NEXT:    retq
660;
661; SSSE3-LABEL: PR20540:
662; SSSE3:       # %bb.0:
663; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
664; SSSE3-NEXT:    retq
665;
666; SSE41-LABEL: PR20540:
667; SSE41:       # %bb.0:
668; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
669; SSE41-NEXT:    retq
670;
671; AVX-LABEL: PR20540:
672; AVX:       # %bb.0:
673; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
674; AVX-NEXT:    retq
675  %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
676  ret <16 x i8> %shuffle
677}
678
679define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
680; SSE-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
681; SSE:       # %bb.0:
682; SSE-NEXT:    movzbl %dil, %eax
683; SSE-NEXT:    movd %eax, %xmm0
684; SSE-NEXT:    retq
685;
686; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
687; AVX:       # %bb.0:
688; AVX-NEXT:    movzbl %dil, %eax
689; AVX-NEXT:    vmovd %eax, %xmm0
690; AVX-NEXT:    retq
691  %a = insertelement <16 x i8> undef, i8 %i, i32 0
692  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
693  ret <16 x i8> %shuffle
694}
695
696define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
697; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
698; SSE2:       # %bb.0:
699; SSE2-NEXT:    shll $8, %edi
700; SSE2-NEXT:    pxor %xmm0, %xmm0
701; SSE2-NEXT:    pinsrw $2, %edi, %xmm0
702; SSE2-NEXT:    retq
703;
704; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
705; SSSE3:       # %bb.0:
706; SSSE3-NEXT:    shll $8, %edi
707; SSSE3-NEXT:    pxor %xmm0, %xmm0
708; SSSE3-NEXT:    pinsrw $2, %edi, %xmm0
709; SSSE3-NEXT:    retq
710;
711; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
712; SSE41:       # %bb.0:
713; SSE41-NEXT:    pxor %xmm0, %xmm0
714; SSE41-NEXT:    pinsrb $5, %edi, %xmm0
715; SSE41-NEXT:    retq
716;
717; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
718; AVX:       # %bb.0:
719; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
720; AVX-NEXT:    vpinsrb $5, %edi, %xmm0, %xmm0
721; AVX-NEXT:    retq
722  %a = insertelement <16 x i8> undef, i8 %i, i32 0
723  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
724  ret <16 x i8> %shuffle
725}
726
727define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) {
728; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
729; SSE2:       # %bb.0:
730; SSE2-NEXT:    shll $8, %edi
731; SSE2-NEXT:    pxor %xmm0, %xmm0
732; SSE2-NEXT:    pinsrw $7, %edi, %xmm0
733; SSE2-NEXT:    retq
734;
735; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
736; SSSE3:       # %bb.0:
737; SSSE3-NEXT:    shll $8, %edi
738; SSSE3-NEXT:    pxor %xmm0, %xmm0
739; SSSE3-NEXT:    pinsrw $7, %edi, %xmm0
740; SSSE3-NEXT:    retq
741;
742; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
743; SSE41:       # %bb.0:
744; SSE41-NEXT:    pxor %xmm0, %xmm0
745; SSE41-NEXT:    pinsrb $15, %edi, %xmm0
746; SSE41-NEXT:    retq
747;
748; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
749; AVX:       # %bb.0:
750; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
751; AVX-NEXT:    vpinsrb $15, %edi, %xmm0, %xmm0
752; AVX-NEXT:    retq
753  %a = insertelement <16 x i8> undef, i8 %i, i32 0
754  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
755  ret <16 x i8> %shuffle
756}
757
758define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
759; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
760; SSE2:       # %bb.0:
761; SSE2-NEXT:    movzbl %dil, %eax
762; SSE2-NEXT:    pxor %xmm0, %xmm0
763; SSE2-NEXT:    pinsrw $1, %eax, %xmm0
764; SSE2-NEXT:    retq
765;
766; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
767; SSSE3:       # %bb.0:
768; SSSE3-NEXT:    movzbl %dil, %eax
769; SSSE3-NEXT:    pxor %xmm0, %xmm0
770; SSSE3-NEXT:    pinsrw $1, %eax, %xmm0
771; SSSE3-NEXT:    retq
772;
773; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
774; SSE41:       # %bb.0:
775; SSE41-NEXT:    pxor %xmm0, %xmm0
776; SSE41-NEXT:    pinsrb $2, %edi, %xmm0
777; SSE41-NEXT:    retq
778;
779; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
780; AVX:       # %bb.0:
781; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
782; AVX-NEXT:    vpinsrb $2, %edi, %xmm0, %xmm0
783; AVX-NEXT:    retq
784  %a = insertelement <16 x i8> undef, i8 %i, i32 3
785  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
786  ret <16 x i8> %shuffle
787}
788
789define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu(<16 x i8> %a) {
790; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu:
791; SSE:       # %bb.0:
792; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
793; SSE-NEXT:    retq
794;
795; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu:
796; AVX:       # %bb.0:
797; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
798; AVX-NEXT:    retq
799  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 undef, i32 18, i32 undef>
800  ret <16 x i8> %shuffle
801}
802
803define <16 x i8> @shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
804; SSE-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
805; SSE:       # %bb.0:
806; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
807; SSE-NEXT:    retq
808;
809; AVX-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
810; AVX:       # %bb.0:
811; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
812; AVX-NEXT:    retq
813  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 28, i32 undef, i32 30, i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 09, i32 0, i32 0, i32 0, i32 0, i32 0>
814  ret <16 x i8> %shuffle
815}
816
817define <16 x i8> @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
818; SSE2-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
819; SSE2:       # %bb.0:
820; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
821; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
822; SSE2-NEXT:    por %xmm1, %xmm0
823; SSE2-NEXT:    retq
824;
825; SSSE3-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
826; SSSE3:       # %bb.0:
827; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
828; SSSE3-NEXT:    retq
829;
830; SSE41-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
831; SSE41:       # %bb.0:
832; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
833; SSE41-NEXT:    retq
834;
835; AVX-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
836; AVX:       # %bb.0:
837; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
838; AVX-NEXT:    retq
839  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
840  ret <16 x i8> %shuffle
841}
842
843define <16 x i8> @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
844; SSE2-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
845; SSE2:       # %bb.0:
846; SSE2-NEXT:    movdqa %xmm0, %xmm1
847; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
848; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
849; SSE2-NEXT:    por %xmm1, %xmm0
850; SSE2-NEXT:    retq
851;
852; SSSE3-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
853; SSSE3:       # %bb.0:
854; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
855; SSSE3-NEXT:    retq
856;
857; SSE41-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
858; SSE41:       # %bb.0:
859; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
860; SSE41-NEXT:    retq
861;
862; AVX-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
863; AVX:       # %bb.0:
864; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
865; AVX-NEXT:    retq
866  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
867  ret <16 x i8> %shuffle
868}
869
870define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(<16 x i8> %a, <16 x i8> %b) {
871; SSE2-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
872; SSE2:       # %bb.0:
873; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
874; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
875; SSE2-NEXT:    por %xmm1, %xmm0
876; SSE2-NEXT:    retq
877;
878; SSSE3-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
879; SSSE3:       # %bb.0:
880; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
881; SSSE3-NEXT:    retq
882;
883; SSE41-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
884; SSE41:       # %bb.0:
885; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
886; SSE41-NEXT:    retq
887;
888; AVX-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
889; AVX:       # %bb.0:
890; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
891; AVX-NEXT:    retq
892  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>
893  ret <16 x i8> %shuffle
894}
895
896define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b) {
897; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
898; SSE2:       # %bb.0:
899; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
900; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
901; SSE2-NEXT:    por %xmm1, %xmm0
902; SSE2-NEXT:    retq
903;
904; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
905; SSSE3:       # %bb.0:
906; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
907; SSSE3-NEXT:    movdqa %xmm1, %xmm0
908; SSSE3-NEXT:    retq
909;
910; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
911; SSE41:       # %bb.0:
912; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
913; SSE41-NEXT:    movdqa %xmm1, %xmm0
914; SSE41-NEXT:    retq
915;
916; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
917; AVX:       # %bb.0:
918; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
919; AVX-NEXT:    retq
920  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
921  ret <16 x i8> %shuffle
922}
923
924define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i8> %a, <16 x i8> %b) {
925; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
926; SSE2:       # %bb.0:
927; SSE2-NEXT:    movdqa %xmm0, %xmm1
928; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
929; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
930; SSE2-NEXT:    por %xmm1, %xmm0
931; SSE2-NEXT:    retq
932;
933; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
934; SSSE3:       # %bb.0:
935; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
936; SSSE3-NEXT:    retq
937;
938; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
939; SSE41:       # %bb.0:
940; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
941; SSE41-NEXT:    retq
942;
943; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
944; AVX:       # %bb.0:
945; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
946; AVX-NEXT:    retq
947  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0>
948  ret <16 x i8> %shuffle
949}
950
951define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<16 x i8> %a, <16 x i8> %b) {
952; SSE2-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
953; SSE2:       # %bb.0:
954; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
955; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
956; SSE2-NEXT:    por %xmm1, %xmm0
957; SSE2-NEXT:    retq
958;
959; SSSE3-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
960; SSSE3:       # %bb.0:
961; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
962; SSSE3-NEXT:    movdqa %xmm1, %xmm0
963; SSSE3-NEXT:    retq
964;
965; SSE41-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
966; SSE41:       # %bb.0:
967; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
968; SSE41-NEXT:    movdqa %xmm1, %xmm0
969; SSE41-NEXT:    retq
970;
971; AVX-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
972; AVX:       # %bb.0:
973; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
974; AVX-NEXT:    retq
975  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
976  ret <16 x i8> %shuffle
977}
978
979; PR31151
980define <16 x i8> @shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23(<16 x i8> %val1, <16 x i8> %val2) {
981; SSE2-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23:
982; SSE2:       # %bb.0:
983; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
984; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,2,1,3]
985; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
986; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
987; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
988; SSE2-NEXT:    pand %xmm1, %xmm0
989; SSE2-NEXT:    pandn %xmm2, %xmm1
990; SSE2-NEXT:    por %xmm0, %xmm1
991; SSE2-NEXT:    movdqa %xmm1, %xmm0
992; SSE2-NEXT:    retq
993;
994; SSSE3-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23:
995; SSSE3:       # %bb.0:
996; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
997; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
998; SSSE3-NEXT:    retq
999;
1000; SSE41-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23:
1001; SSE41:       # %bb.0:
1002; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1003; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
1004; SSE41-NEXT:    retq
1005;
1006; AVX-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23:
1007; AVX:       # %bb.0:
1008; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1009; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
1010; AVX-NEXT:    retq
1011  %shuffle = shufflevector <16 x i8> %val1, <16 x i8> %val2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23>
1012  ret <16 x i8> %shuffle
1013}
1014
1015define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a) {
1016; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
1017; SSE2:       # %bb.0:
1018; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1019; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1020; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1021; SSE2-NEXT:    retq
1022;
1023; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
1024; SSSE3:       # %bb.0:
1025; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1026; SSSE3-NEXT:    retq
1027;
1028; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
1029; SSE41:       # %bb.0:
1030; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1031; SSE41-NEXT:    retq
1032;
1033; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
1034; AVX:       # %bb.0:
1035; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1036; AVX-NEXT:    retq
1037  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1038  ret <16 x i8> %shuffle
1039}
1040
1041define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
1042; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
1043; SSE2:       # %bb.0:
1044; SSE2-NEXT:    pxor %xmm1, %xmm1
1045; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1046; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1047; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1048; SSE2-NEXT:    retq
1049;
1050; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
1051; SSSE3:       # %bb.0:
1052; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1053; SSSE3-NEXT:    retq
1054;
1055; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
1056; SSE41:       # %bb.0:
1057; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1058; SSE41-NEXT:    retq
1059;
1060; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
1061; AVX:       # %bb.0:
1062; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1063; AVX-NEXT:    retq
1064  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 1, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
1065  ret <16 x i8> %shuffle
1066}
1067
1068define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu(<16 x i8> %a) {
1069; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
1070; SSE2:       # %bb.0:
1071; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1072; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1073; SSE2-NEXT:    retq
1074;
1075; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
1076; SSSE3:       # %bb.0:
1077; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1078; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1079; SSSE3-NEXT:    retq
1080;
1081; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
1082; SSE41:       # %bb.0:
1083; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1084; SSE41-NEXT:    retq
1085;
1086; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
1087; AVX:       # %bb.0:
1088; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1089; AVX-NEXT:    retq
1090  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
1091  ret <16 x i8> %shuffle
1092}
1093
1094define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz(<16 x i8> %a) {
1095; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
1096; SSE2:       # %bb.0:
1097; SSE2-NEXT:    pxor %xmm1, %xmm1
1098; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1099; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1100; SSE2-NEXT:    retq
1101;
1102; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
1103; SSSE3:       # %bb.0:
1104; SSSE3-NEXT:    pxor %xmm1, %xmm1
1105; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1106; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1107; SSSE3-NEXT:    retq
1108;
1109; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
1110; SSE41:       # %bb.0:
1111; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1112; SSE41-NEXT:    retq
1113;
1114; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
1115; AVX:       # %bb.0:
1116; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1117; AVX-NEXT:    retq
1118  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31>
1119  ret <16 x i8> %shuffle
1120}
1121
1122define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu(<16 x i8> %a) {
1123; SSE2-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
1124; SSE2:       # %bb.0:
1125; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1126; SSE2-NEXT:    retq
1127;
1128; SSSE3-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
1129; SSSE3:       # %bb.0:
1130; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1131; SSSE3-NEXT:    retq
1132;
1133; SSE41-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
1134; SSE41:       # %bb.0:
1135; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1136; SSE41-NEXT:    retq
1137;
1138; AVX-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
1139; AVX:       # %bb.0:
1140; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1141; AVX-NEXT:    retq
1142  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
1143  ret <16 x i8> %shuffle
1144}
1145
1146define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz(<16 x i8> %a) {
1147; SSE2-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
1148; SSE2:       # %bb.0:
1149; SSE2-NEXT:    pxor %xmm1, %xmm1
1150; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1151; SSE2-NEXT:    retq
1152;
1153; SSSE3-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
1154; SSSE3:       # %bb.0:
1155; SSSE3-NEXT:    pxor %xmm1, %xmm1
1156; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1157; SSSE3-NEXT:    retq
1158;
1159; SSE41-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
1160; SSE41:       # %bb.0:
1161; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1162; SSE41-NEXT:    retq
1163;
1164; AVX-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
1165; AVX:       # %bb.0:
1166; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1167; AVX-NEXT:    retq
1168  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31>
1169  ret <16 x i8> %shuffle
1170}
1171
1172define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(<16 x i8> %a, <16 x i8> %b) {
1173; SSE2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
1174; SSE2:       # %bb.0: # %entry
1175; SSE2-NEXT:    pxor %xmm2, %xmm2
1176; SSE2-NEXT:    movdqa %xmm0, %xmm3
1177; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1178; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm3[1,3,2,0,4,5,6,7]
1179; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,0,2,1]
1180; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [65535,65535,65535,0,65535,0,0,65535]
1181; SSE2-NEXT:    pand %xmm5, %xmm4
1182; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
1183; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,3,0,1]
1184; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,1,2,2,4,5,6,7]
1185; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7]
1186; SSE2-NEXT:    pandn %xmm2, %xmm5
1187; SSE2-NEXT:    por %xmm4, %xmm5
1188; SSE2-NEXT:    psrlq $16, %xmm0
1189; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
1190; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[3,1,1,3]
1191; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1192; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,4]
1193; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
1194; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
1195; SSE2-NEXT:    packuswb %xmm5, %xmm2
1196; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [255,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
1197; SSE2-NEXT:    pand %xmm0, %xmm2
1198; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,1,1,3,4,5,6,7]
1199; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1200; SSE2-NEXT:    pandn %xmm1, %xmm0
1201; SSE2-NEXT:    por %xmm2, %xmm0
1202; SSE2-NEXT:    retq
1203;
1204; SSSE3-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
1205; SSSE3:       # %bb.0: # %entry
1206; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
1207; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
1208; SSSE3-NEXT:    por %xmm1, %xmm0
1209; SSSE3-NEXT:    retq
1210;
1211; SSE41-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
1212; SSE41:       # %bb.0: # %entry
1213; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
1214; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
1215; SSE41-NEXT:    por %xmm1, %xmm0
1216; SSE41-NEXT:    retq
1217;
1218; AVX1OR2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
1219; AVX1OR2:       # %bb.0: # %entry
1220; AVX1OR2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
1221; AVX1OR2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
1222; AVX1OR2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1223; AVX1OR2-NEXT:    retq
1224;
1225; AVX512VLBW-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
1226; AVX512VLBW:       # %bb.0: # %entry
1227; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
1228; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
1229; AVX512VLBW-NEXT:    vpor %xmm1, %xmm0, %xmm0
1230; AVX512VLBW-NEXT:    retq
1231;
1232; AVX512VLVBMI-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
1233; AVX512VLVBMI:       # %bb.0: # %entry
1234; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} xmm2 = <u,10,2,7,22,14,7,2,18,3,1,14,18,9,11,0>
1235; AVX512VLVBMI-NEXT:    vpermt2b %xmm1, %xmm2, %xmm0
1236; AVX512VLVBMI-NEXT:    retq
1237entry:
1238  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 undef, i32 10, i32 2, i32 7, i32 22, i32 14, i32 7, i32 2, i32 18, i32 3, i32 1, i32 14, i32 18, i32 9, i32 11, i32 0>
1239
1240  ret <16 x i8> %shuffle
1241}
1242
1243define <16 x i8> @shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30(<8 x i16> %a0, <8 x i16> %a1) {
1244; SSE-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
1245; SSE:       # %bb.0:
1246; SSE-NEXT:    psrlw $8, %xmm0
1247; SSE-NEXT:    psrlw $8, %xmm1
1248; SSE-NEXT:    packuswb %xmm1, %xmm0
1249; SSE-NEXT:    retq
1250;
1251; AVX-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
1252; AVX:       # %bb.0:
1253; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
1254; AVX-NEXT:    vpsrlw $8, %xmm1, %xmm1
1255; AVX-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
1256; AVX-NEXT:    retq
1257  %1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1258  %2 = lshr <8 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1259  %3 = bitcast <8 x i16> %1 to <16 x i8>
1260  %4 = bitcast <8 x i16> %2 to <16 x i8>
1261  %5 = shufflevector <16 x i8> %3, <16 x i8> %4, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1262  ret <16 x i8> %5
1263}
1264
1265define <16 x i8> @stress_test2(<16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i8> %s.0.2) {
1266; Nothing interesting to test here. Just make sure we didn't crashe.
1267; ALL-LABEL: stress_test2:
1268; ALL:         retq
1269entry:
1270  %s.1.0 = shufflevector <16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i32> <i32 29, i32 30, i32 2, i32 16, i32 26, i32 21, i32 11, i32 26, i32 26, i32 3, i32 4, i32 5, i32 30, i32 28, i32 15, i32 5>
1271  %s.1.1 = shufflevector <16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i32> <i32 31, i32 1, i32 24, i32 12, i32 28, i32 5, i32 2, i32 9, i32 29, i32 1, i32 31, i32 5, i32 6, i32 17, i32 15, i32 22>
1272  %s.2.0 = shufflevector <16 x i8> %s.1.0, <16 x i8> %s.1.1, <16 x i32> <i32 22, i32 1, i32 12, i32 3, i32 30, i32 4, i32 30, i32 undef, i32 1, i32 10, i32 14, i32 18, i32 27, i32 13, i32 16, i32 19>
1273
1274  ret <16 x i8> %s.2.0
1275}
1276
1277define void @constant_gets_selected(<4 x i32>* %ptr1, <4 x i32>* %ptr2) {
1278; SSE-LABEL: constant_gets_selected:
1279; SSE:       # %bb.0: # %entry
1280; SSE-NEXT:    xorps %xmm0, %xmm0
1281; SSE-NEXT:    movaps %xmm0, (%rdi)
1282; SSE-NEXT:    movaps %xmm0, (%rsi)
1283; SSE-NEXT:    retq
1284;
1285; AVX-LABEL: constant_gets_selected:
1286; AVX:       # %bb.0: # %entry
1287; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
1288; AVX-NEXT:    vmovaps %xmm0, (%rdi)
1289; AVX-NEXT:    vmovaps %xmm0, (%rsi)
1290; AVX-NEXT:    retq
1291entry:
1292  %weird_zero = bitcast <4 x i32> zeroinitializer to <16 x i8>
1293  %shuffle.i = shufflevector <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %weird_zero, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
1294  %weirder_zero = bitcast <16 x i8> %shuffle.i to <4 x i32>
1295  store <4 x i32> %weirder_zero, <4 x i32>* %ptr1, align 16
1296  store <4 x i32> zeroinitializer, <4 x i32>* %ptr2, align 16
1297  ret void
1298}
1299
1300;
1301; Shuffle to logical bit shifts
1302;
1303
1304define <16 x i8> @shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i8> %a, <16 x i8> %b) {
1305; SSE-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
1306; SSE:       # %bb.0:
1307; SSE-NEXT:    psllw $8, %xmm0
1308; SSE-NEXT:    retq
1309;
1310; AVX-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
1311; AVX:       # %bb.0:
1312; AVX-NEXT:    vpsllw $8, %xmm0, %xmm0
1313; AVX-NEXT:    retq
1314  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
1315  ret <16 x i8> %shuffle
1316}
1317
1318define <16 x i8> @shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i8> %a, <16 x i8> %b) {
1319; SSE-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
1320; SSE:       # %bb.0:
1321; SSE-NEXT:    pslld $24, %xmm0
1322; SSE-NEXT:    retq
1323;
1324; AVX-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
1325; AVX:       # %bb.0:
1326; AVX-NEXT:    vpslld $24, %xmm0, %xmm0
1327; AVX-NEXT:    retq
1328  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
1329  ret <16 x i8> %shuffle
1330}
1331
1332define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08(<16 x i8> %a, <16 x i8> %b) {
1333; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08:
1334; SSE:       # %bb.0:
1335; SSE-NEXT:    psllq $56, %xmm0
1336; SSE-NEXT:    retq
1337;
1338; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08:
1339; AVX:       # %bb.0:
1340; AVX-NEXT:    vpsllq $56, %xmm0, %xmm0
1341; AVX-NEXT:    retq
1342  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8>
1343  ret <16 x i8> %shuffle
1344}
1345
1346define <16 x i8> @shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
1347; SSE-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14:
1348; SSE:       # %bb.0:
1349; SSE-NEXT:    psllq $8, %xmm0
1350; SSE-NEXT:    retq
1351;
1352; AVX-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14:
1353; AVX:       # %bb.0:
1354; AVX-NEXT:    vpsllq $8, %xmm0, %xmm0
1355; AVX-NEXT:    retq
1356  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 undef, i32 2, i32 3, i32 undef, i32 5, i32 6, i32 16, i32 8, i32 9, i32 undef, i32 11, i32 12, i32 13, i32 14>
1357  ret <16 x i8> %shuffle
1358}
1359
1360define <16 x i8> @shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz(<16 x i8> %a, <16 x i8> %b) {
1361; SSE-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz:
1362; SSE:       # %bb.0:
1363; SSE-NEXT:    psrlw $8, %xmm0
1364; SSE-NEXT:    retq
1365;
1366; AVX-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz:
1367; AVX:       # %bb.0:
1368; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
1369; AVX-NEXT:    retq
1370  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 undef, i32 16, i32 undef, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
1371  ret <16 x i8> %shuffle
1372}
1373
1374define <16 x i8> @shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz(<16 x i8> %a, <16 x i8> %b) {
1375; SSE-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz:
1376; SSE:       # %bb.0:
1377; SSE-NEXT:    psrld $16, %xmm0
1378; SSE-NEXT:    retq
1379;
1380; AVX-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz:
1381; AVX:       # %bb.0:
1382; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
1383; AVX-NEXT:    retq
1384  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 15, i32 16, i32 16>
1385  ret <16 x i8> %shuffle
1386}
1387
1388define <16 x i8> @shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz(<16 x i8> %a, <16 x i8> %b) {
1389; SSE-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz:
1390; SSE:       # %bb.0:
1391; SSE-NEXT:    psrlq $56, %xmm0
1392; SSE-NEXT:    retq
1393;
1394; AVX-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz:
1395; AVX:       # %bb.0:
1396; AVX-NEXT:    vpsrlq $56, %xmm0, %xmm0
1397; AVX-NEXT:    retq
1398  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16>
1399  ret <16 x i8> %shuffle
1400}
1401
1402define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) {
1403; SSE2-LABEL: PR12412:
1404; SSE2:       # %bb.0: # %entry
1405; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1406; SSE2-NEXT:    pand %xmm2, %xmm1
1407; SSE2-NEXT:    pand %xmm2, %xmm0
1408; SSE2-NEXT:    packuswb %xmm1, %xmm0
1409; SSE2-NEXT:    retq
1410;
1411; SSSE3-LABEL: PR12412:
1412; SSSE3:       # %bb.0: # %entry
1413; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1414; SSSE3-NEXT:    pshufb %xmm2, %xmm1
1415; SSSE3-NEXT:    pshufb %xmm2, %xmm0
1416; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1417; SSSE3-NEXT:    retq
1418;
1419; SSE41-LABEL: PR12412:
1420; SSE41:       # %bb.0: # %entry
1421; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1422; SSE41-NEXT:    pshufb %xmm2, %xmm1
1423; SSE41-NEXT:    pshufb %xmm2, %xmm0
1424; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1425; SSE41-NEXT:    retq
1426;
1427; AVX-LABEL: PR12412:
1428; AVX:       # %bb.0: # %entry
1429; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1430; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1431; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1432; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1433; AVX-NEXT:    retq
1434entry:
1435  %0 = shufflevector <16 x i8> %inval1, <16 x i8> %inval2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1436  ret <16 x i8> %0
1437}
1438
1439define <16 x i8> @shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz(<16 x i8> %a) {
1440; SSE-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz:
1441; SSE:       # %bb.0:
1442; SSE-NEXT:    psrld $8, %xmm0
1443; SSE-NEXT:    retq
1444;
1445; AVX-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz:
1446; AVX:       # %bb.0:
1447; AVX-NEXT:    vpsrld $8, %xmm0, %xmm0
1448; AVX-NEXT:    retq
1449  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 2, i32 3, i32 16, i32 undef, i32 6, i32 7, i32 16, i32 undef, i32 10, i32 11, i32 16, i32 undef, i32 14, i32 15, i32 16>
1450  ret <16 x i8> %shuffle
1451}
1452
1453define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8> %b) {
1454; SSE-LABEL: shuffle_v16i8_bitcast_unpack:
1455; SSE:       # %bb.0:
1456; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1457; SSE-NEXT:    retq
1458;
1459; AVX-LABEL: shuffle_v16i8_bitcast_unpack:
1460; AVX:       # %bb.0:
1461; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1462; AVX-NEXT:    retq
1463  %shuffle8  = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32 2, i32 18, i32 1, i32 17, i32 0, i32 16>
1464  %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float>
1465  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1466  %bitcast16 = bitcast <4 x float> %shuffle32 to <8 x i16>
1467  %shuffle16 = shufflevector <8 x i16> %bitcast16, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1468  %bitcast8  = bitcast <8 x i16> %shuffle16 to <16 x i8>
1469  ret <16 x i8> %bitcast8
1470}
1471
1472define <16 x i8> @insert_dup_mem_v16i8_i32(i32* %ptr) {
1473; SSE2-LABEL: insert_dup_mem_v16i8_i32:
1474; SSE2:       # %bb.0:
1475; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1476; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1477; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
1478; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1479; SSE2-NEXT:    retq
1480;
1481; SSSE3-LABEL: insert_dup_mem_v16i8_i32:
1482; SSSE3:       # %bb.0:
1483; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1484; SSSE3-NEXT:    pxor %xmm1, %xmm1
1485; SSSE3-NEXT:    pshufb %xmm1, %xmm0
1486; SSSE3-NEXT:    retq
1487;
1488; SSE41-LABEL: insert_dup_mem_v16i8_i32:
1489; SSE41:       # %bb.0:
1490; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1491; SSE41-NEXT:    pxor %xmm1, %xmm1
1492; SSE41-NEXT:    pshufb %xmm1, %xmm0
1493; SSE41-NEXT:    retq
1494;
1495; AVX1-LABEL: insert_dup_mem_v16i8_i32:
1496; AVX1:       # %bb.0:
1497; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1498; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1499; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
1500; AVX1-NEXT:    retq
1501;
1502; AVX2OR512VL-LABEL: insert_dup_mem_v16i8_i32:
1503; AVX2OR512VL:       # %bb.0:
1504; AVX2OR512VL-NEXT:    vpbroadcastb (%rdi), %xmm0
1505; AVX2OR512VL-NEXT:    retq
1506  %tmp = load i32, i32* %ptr, align 4
1507  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
1508  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
1509  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
1510  ret <16 x i8> %tmp3
1511}
1512
1513define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
1514; SSE2-LABEL: insert_dup_mem_v16i8_sext_i8:
1515; SSE2:       # %bb.0:
1516; SSE2-NEXT:    movsbl (%rdi), %eax
1517; SSE2-NEXT:    movd %eax, %xmm0
1518; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1519; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
1520; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1521; SSE2-NEXT:    retq
1522;
1523; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8:
1524; SSSE3:       # %bb.0:
1525; SSSE3-NEXT:    movsbl (%rdi), %eax
1526; SSSE3-NEXT:    movd %eax, %xmm0
1527; SSSE3-NEXT:    pxor %xmm1, %xmm1
1528; SSSE3-NEXT:    pshufb %xmm1, %xmm0
1529; SSSE3-NEXT:    retq
1530;
1531; SSE41-LABEL: insert_dup_mem_v16i8_sext_i8:
1532; SSE41:       # %bb.0:
1533; SSE41-NEXT:    movsbl (%rdi), %eax
1534; SSE41-NEXT:    movd %eax, %xmm0
1535; SSE41-NEXT:    pxor %xmm1, %xmm1
1536; SSE41-NEXT:    pshufb %xmm1, %xmm0
1537; SSE41-NEXT:    retq
1538;
1539; AVX1-LABEL: insert_dup_mem_v16i8_sext_i8:
1540; AVX1:       # %bb.0:
1541; AVX1-NEXT:    movsbl (%rdi), %eax
1542; AVX1-NEXT:    vmovd %eax, %xmm0
1543; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1544; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
1545; AVX1-NEXT:    retq
1546;
1547; AVX2OR512VL-LABEL: insert_dup_mem_v16i8_sext_i8:
1548; AVX2OR512VL:       # %bb.0:
1549; AVX2OR512VL-NEXT:    vpbroadcastb (%rdi), %xmm0
1550; AVX2OR512VL-NEXT:    retq
1551  %tmp = load i8, i8* %ptr, align 1
1552  %tmp1 = sext i8 %tmp to i32
1553  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
1554  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
1555  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> zeroinitializer
1556  ret <16 x i8> %tmp4
1557}
1558
1559define <16 x i8> @insert_dup_elt1_mem_v16i8_i32(i32* %ptr) {
1560; SSE2-LABEL: insert_dup_elt1_mem_v16i8_i32:
1561; SSE2:       # %bb.0:
1562; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1563; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1564; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
1565; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1566; SSE2-NEXT:    retq
1567;
1568; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_i32:
1569; SSSE3:       # %bb.0:
1570; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1571; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1572; SSSE3-NEXT:    retq
1573;
1574; SSE41-LABEL: insert_dup_elt1_mem_v16i8_i32:
1575; SSE41:       # %bb.0:
1576; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1577; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1578; SSE41-NEXT:    retq
1579;
1580; AVX1-LABEL: insert_dup_elt1_mem_v16i8_i32:
1581; AVX1:       # %bb.0:
1582; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1583; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1584; AVX1-NEXT:    retq
1585;
1586; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v16i8_i32:
1587; AVX2OR512VL:       # %bb.0:
1588; AVX2OR512VL-NEXT:    vpbroadcastb 1(%rdi), %xmm0
1589; AVX2OR512VL-NEXT:    retq
1590  %tmp = load i32, i32* %ptr, align 4
1591  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
1592  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
1593  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1594  ret <16 x i8> %tmp3
1595}
1596
1597define <16 x i8> @insert_dup_elt2_mem_v16i8_i32(i32* %ptr) {
1598; SSE2-LABEL: insert_dup_elt2_mem_v16i8_i32:
1599; SSE2:       # %bb.0:
1600; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1601; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1602; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,3,4,5,6,7]
1603; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1604; SSE2-NEXT:    retq
1605;
1606; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_i32:
1607; SSSE3:       # %bb.0:
1608; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1609; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1610; SSSE3-NEXT:    retq
1611;
1612; SSE41-LABEL: insert_dup_elt2_mem_v16i8_i32:
1613; SSE41:       # %bb.0:
1614; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1615; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1616; SSE41-NEXT:    retq
1617;
1618; AVX1-LABEL: insert_dup_elt2_mem_v16i8_i32:
1619; AVX1:       # %bb.0:
1620; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1621; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1622; AVX1-NEXT:    retq
1623;
1624; AVX2OR512VL-LABEL: insert_dup_elt2_mem_v16i8_i32:
1625; AVX2OR512VL:       # %bb.0:
1626; AVX2OR512VL-NEXT:    vpbroadcastb 2(%rdi), %xmm0
1627; AVX2OR512VL-NEXT:    retq
1628  %tmp = load i32, i32* %ptr, align 4
1629  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
1630  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
1631  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
1632  ret <16 x i8> %tmp3
1633}
1634
1635define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(i8* %ptr) {
1636; SSE2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
1637; SSE2:       # %bb.0:
1638; SSE2-NEXT:    movsbl (%rdi), %eax
1639; SSE2-NEXT:    movd %eax, %xmm0
1640; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1641; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
1642; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1643; SSE2-NEXT:    retq
1644;
1645; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
1646; SSSE3:       # %bb.0:
1647; SSSE3-NEXT:    movsbl (%rdi), %eax
1648; SSSE3-NEXT:    movd %eax, %xmm0
1649; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1650; SSSE3-NEXT:    retq
1651;
1652; SSE41-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
1653; SSE41:       # %bb.0:
1654; SSE41-NEXT:    movsbl (%rdi), %eax
1655; SSE41-NEXT:    movd %eax, %xmm0
1656; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1657; SSE41-NEXT:    retq
1658;
1659; AVX1-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
1660; AVX1:       # %bb.0:
1661; AVX1-NEXT:    movsbl (%rdi), %eax
1662; AVX1-NEXT:    vmovd %eax, %xmm0
1663; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1664; AVX1-NEXT:    retq
1665;
1666; AVX2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
1667; AVX2:       # %bb.0:
1668; AVX2-NEXT:    movsbl (%rdi), %eax
1669; AVX2-NEXT:    shrl $8, %eax
1670; AVX2-NEXT:    vmovd %eax, %xmm0
1671; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
1672; AVX2-NEXT:    retq
1673;
1674; AVX512VL-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
1675; AVX512VL:       # %bb.0:
1676; AVX512VL-NEXT:    movsbl (%rdi), %eax
1677; AVX512VL-NEXT:    shrl $8, %eax
1678; AVX512VL-NEXT:    vpbroadcastb %eax, %xmm0
1679; AVX512VL-NEXT:    retq
1680  %tmp = load i8, i8* %ptr, align 1
1681  %tmp1 = sext i8 %tmp to i32
1682  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
1683  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
1684  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1685  ret <16 x i8> %tmp4
1686}
1687
1688define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(i8* %ptr) {
1689; SSE2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
1690; SSE2:       # %bb.0:
1691; SSE2-NEXT:    movsbl (%rdi), %eax
1692; SSE2-NEXT:    movd %eax, %xmm0
1693; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1694; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,3,4,5,6,7]
1695; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1696; SSE2-NEXT:    retq
1697;
1698; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
1699; SSSE3:       # %bb.0:
1700; SSSE3-NEXT:    movsbl (%rdi), %eax
1701; SSSE3-NEXT:    movd %eax, %xmm0
1702; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1703; SSSE3-NEXT:    retq
1704;
1705; SSE41-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
1706; SSE41:       # %bb.0:
1707; SSE41-NEXT:    movsbl (%rdi), %eax
1708; SSE41-NEXT:    movd %eax, %xmm0
1709; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1710; SSE41-NEXT:    retq
1711;
1712; AVX1-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
1713; AVX1:       # %bb.0:
1714; AVX1-NEXT:    movsbl (%rdi), %eax
1715; AVX1-NEXT:    vmovd %eax, %xmm0
1716; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1717; AVX1-NEXT:    retq
1718;
1719; AVX2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
1720; AVX2:       # %bb.0:
1721; AVX2-NEXT:    movsbl (%rdi), %eax
1722; AVX2-NEXT:    shrl $16, %eax
1723; AVX2-NEXT:    vmovd %eax, %xmm0
1724; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
1725; AVX2-NEXT:    retq
1726;
1727; AVX512VL-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
1728; AVX512VL:       # %bb.0:
1729; AVX512VL-NEXT:    movsbl (%rdi), %eax
1730; AVX512VL-NEXT:    shrl $16, %eax
1731; AVX512VL-NEXT:    vpbroadcastb %eax, %xmm0
1732; AVX512VL-NEXT:    retq
1733  %tmp = load i8, i8* %ptr, align 1
1734  %tmp1 = sext i8 %tmp to i32
1735  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
1736  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
1737  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
1738  ret <16 x i8> %tmp4
1739}
1740
1741define <16 x i8> @PR31364(i8* nocapture readonly %a, i8* nocapture readonly %b) {
1742; SSE2-LABEL: PR31364:
1743; SSE2:       # %bb.0:
1744; SSE2-NEXT:    movzbl (%rdi), %eax
1745; SSE2-NEXT:    movzbl (%rsi), %ecx
1746; SSE2-NEXT:    shll $8, %ecx
1747; SSE2-NEXT:    orl %eax, %ecx
1748; SSE2-NEXT:    movzwl %cx, %eax
1749; SSE2-NEXT:    movd %eax, %xmm1
1750; SSE2-NEXT:    pxor %xmm0, %xmm0
1751; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1752; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[1,1,1,3,4,5,6,7]
1753; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
1754; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
1755; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
1756; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,4,4]
1757; SSE2-NEXT:    packuswb %xmm1, %xmm0
1758; SSE2-NEXT:    retq
1759;
1760; SSSE3-LABEL: PR31364:
1761; SSSE3:       # %bb.0:
1762; SSSE3-NEXT:    movzbl (%rdi), %eax
1763; SSSE3-NEXT:    movzbl (%rsi), %ecx
1764; SSSE3-NEXT:    shll $8, %ecx
1765; SSSE3-NEXT:    orl %eax, %ecx
1766; SSSE3-NEXT:    movzwl %cx, %eax
1767; SSSE3-NEXT:    movd %eax, %xmm0
1768; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0]
1769; SSSE3-NEXT:    retq
1770;
1771; SSE41-LABEL: PR31364:
1772; SSE41:       # %bb.0:
1773; SSE41-NEXT:    pxor %xmm0, %xmm0
1774; SSE41-NEXT:    pinsrb $0, (%rdi), %xmm0
1775; SSE41-NEXT:    pinsrb $1, (%rsi), %xmm0
1776; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0]
1777; SSE41-NEXT:    retq
1778;
1779; AVX-LABEL: PR31364:
1780; AVX:       # %bb.0:
1781; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1782; AVX-NEXT:    vpinsrb $0, (%rdi), %xmm0, %xmm0
1783; AVX-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm0
1784; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0]
1785; AVX-NEXT:    retq
1786  %v0 = load i8, i8* %a, align 1
1787  %vecins = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %v0, i32 0
1788  %v1 = load i8, i8* %b, align 1
1789  %vecins2 = insertelement <16 x i8> %vecins, i8 %v1, i32 1
1790  %result = shufflevector <16 x i8> %vecins2, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 3, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0>
1791  ret <16 x i8> %result
1792}
1793
1794define <16 x i8> @PR31301(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
1795; SSE2-LABEL: PR31301:
1796; SSE2:       # %bb.0: # %entry
1797; SSE2-NEXT:    movzbl (%rdi), %eax
1798; SSE2-NEXT:    movd %eax, %xmm0
1799; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1800; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
1801; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1802; SSE2-NEXT:    movzbl (%rsi), %eax
1803; SSE2-NEXT:    movd %eax, %xmm1
1804; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1805; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
1806; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1807; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1808; SSE2-NEXT:    retq
1809;
1810; SSSE3-LABEL: PR31301:
1811; SSSE3:       # %bb.0: # %entry
1812; SSSE3-NEXT:    movzbl (%rdi), %eax
1813; SSSE3-NEXT:    movd %eax, %xmm0
1814; SSSE3-NEXT:    pxor %xmm1, %xmm1
1815; SSSE3-NEXT:    pshufb %xmm1, %xmm0
1816; SSSE3-NEXT:    movzbl (%rsi), %eax
1817; SSSE3-NEXT:    movd %eax, %xmm2
1818; SSSE3-NEXT:    pshufb %xmm1, %xmm2
1819; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1820; SSSE3-NEXT:    retq
1821;
1822; SSE41-LABEL: PR31301:
1823; SSE41:       # %bb.0: # %entry
1824; SSE41-NEXT:    movzbl (%rdi), %eax
1825; SSE41-NEXT:    movd %eax, %xmm0
1826; SSE41-NEXT:    pxor %xmm1, %xmm1
1827; SSE41-NEXT:    pshufb %xmm1, %xmm0
1828; SSE41-NEXT:    movzbl (%rsi), %eax
1829; SSE41-NEXT:    movd %eax, %xmm2
1830; SSE41-NEXT:    pshufb %xmm1, %xmm2
1831; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1832; SSE41-NEXT:    retq
1833;
1834; AVX1-LABEL: PR31301:
1835; AVX1:       # %bb.0: # %entry
1836; AVX1-NEXT:    movzbl (%rdi), %eax
1837; AVX1-NEXT:    vmovd %eax, %xmm0
1838; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1839; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
1840; AVX1-NEXT:    movzbl (%rsi), %eax
1841; AVX1-NEXT:    vmovd %eax, %xmm2
1842; AVX1-NEXT:    vpshufb %xmm1, %xmm2, %xmm1
1843; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1844; AVX1-NEXT:    retq
1845;
1846; AVX2OR512VL-LABEL: PR31301:
1847; AVX2OR512VL:       # %bb.0: # %entry
1848; AVX2OR512VL-NEXT:    vpbroadcastb (%rdi), %xmm0
1849; AVX2OR512VL-NEXT:    vpbroadcastb (%rsi), %xmm1
1850; AVX2OR512VL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1851; AVX2OR512VL-NEXT:    retq
1852entry:
1853  %0 = load i8, i8* %x, align 1
1854  %1 = insertelement <16 x i8> undef, i8 %0, i32 0
1855  %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1856  %2 = load i8, i8* %y, align 1
1857  %3 = insertelement <16 x i8> undef, i8 %2, i32 0
1858  %lane3 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1859  %vzip.i = shufflevector <16 x i8> %lane, <16 x i8> %lane3, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1860  ret <16 x i8> %vzip.i
1861}
1862