• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
12
13;
14; Signed saturation truncation to vXi32
15;
16
17define <4 x i32> @trunc_ssat_v4i64_v4i32(<4 x i64> %a0) {
18; SSE2-LABEL: trunc_ssat_v4i64_v4i32:
19; SSE2:       # %bb.0:
20; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [2147483647,2147483647]
21; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
22; SSE2-NEXT:    movdqa %xmm0, %xmm3
23; SSE2-NEXT:    pxor %xmm2, %xmm3
24; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [4294967295,4294967295]
25; SSE2-NEXT:    movdqa %xmm5, %xmm6
26; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
27; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
28; SSE2-NEXT:    pcmpeqd %xmm5, %xmm3
29; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
30; SSE2-NEXT:    pand %xmm7, %xmm4
31; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
32; SSE2-NEXT:    por %xmm4, %xmm3
33; SSE2-NEXT:    pand %xmm3, %xmm0
34; SSE2-NEXT:    pandn %xmm8, %xmm3
35; SSE2-NEXT:    por %xmm0, %xmm3
36; SSE2-NEXT:    movdqa %xmm1, %xmm0
37; SSE2-NEXT:    pxor %xmm2, %xmm0
38; SSE2-NEXT:    movdqa %xmm5, %xmm4
39; SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
40; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
41; SSE2-NEXT:    pcmpeqd %xmm5, %xmm0
42; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
43; SSE2-NEXT:    pand %xmm6, %xmm0
44; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
45; SSE2-NEXT:    por %xmm0, %xmm4
46; SSE2-NEXT:    pand %xmm4, %xmm1
47; SSE2-NEXT:    pandn %xmm8, %xmm4
48; SSE2-NEXT:    por %xmm1, %xmm4
49; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
50; SSE2-NEXT:    movdqa %xmm4, %xmm0
51; SSE2-NEXT:    pxor %xmm2, %xmm0
52; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [18446744069414584320,18446744069414584320]
53; SSE2-NEXT:    movdqa %xmm0, %xmm6
54; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
55; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
56; SSE2-NEXT:    pcmpeqd %xmm5, %xmm0
57; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
58; SSE2-NEXT:    pand %xmm7, %xmm0
59; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
60; SSE2-NEXT:    por %xmm0, %xmm6
61; SSE2-NEXT:    pand %xmm6, %xmm4
62; SSE2-NEXT:    pandn %xmm1, %xmm6
63; SSE2-NEXT:    por %xmm4, %xmm6
64; SSE2-NEXT:    pxor %xmm3, %xmm2
65; SSE2-NEXT:    movdqa %xmm2, %xmm0
66; SSE2-NEXT:    pcmpgtd %xmm5, %xmm0
67; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
68; SSE2-NEXT:    pcmpeqd %xmm5, %xmm2
69; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
70; SSE2-NEXT:    pand %xmm4, %xmm2
71; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
72; SSE2-NEXT:    por %xmm2, %xmm0
73; SSE2-NEXT:    pand %xmm0, %xmm3
74; SSE2-NEXT:    pandn %xmm1, %xmm0
75; SSE2-NEXT:    por %xmm3, %xmm0
76; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm6[0,2]
77; SSE2-NEXT:    retq
78;
79; SSSE3-LABEL: trunc_ssat_v4i64_v4i32:
80; SSSE3:       # %bb.0:
81; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [2147483647,2147483647]
82; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
83; SSSE3-NEXT:    movdqa %xmm0, %xmm3
84; SSSE3-NEXT:    pxor %xmm2, %xmm3
85; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [4294967295,4294967295]
86; SSSE3-NEXT:    movdqa %xmm5, %xmm6
87; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm6
88; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
89; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm3
90; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
91; SSSE3-NEXT:    pand %xmm7, %xmm4
92; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
93; SSSE3-NEXT:    por %xmm4, %xmm3
94; SSSE3-NEXT:    pand %xmm3, %xmm0
95; SSSE3-NEXT:    pandn %xmm8, %xmm3
96; SSSE3-NEXT:    por %xmm0, %xmm3
97; SSSE3-NEXT:    movdqa %xmm1, %xmm0
98; SSSE3-NEXT:    pxor %xmm2, %xmm0
99; SSSE3-NEXT:    movdqa %xmm5, %xmm4
100; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm4
101; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
102; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm0
103; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
104; SSSE3-NEXT:    pand %xmm6, %xmm0
105; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
106; SSSE3-NEXT:    por %xmm0, %xmm4
107; SSSE3-NEXT:    pand %xmm4, %xmm1
108; SSSE3-NEXT:    pandn %xmm8, %xmm4
109; SSSE3-NEXT:    por %xmm1, %xmm4
110; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
111; SSSE3-NEXT:    movdqa %xmm4, %xmm0
112; SSSE3-NEXT:    pxor %xmm2, %xmm0
113; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [18446744069414584320,18446744069414584320]
114; SSSE3-NEXT:    movdqa %xmm0, %xmm6
115; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm6
116; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
117; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm0
118; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
119; SSSE3-NEXT:    pand %xmm7, %xmm0
120; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
121; SSSE3-NEXT:    por %xmm0, %xmm6
122; SSSE3-NEXT:    pand %xmm6, %xmm4
123; SSSE3-NEXT:    pandn %xmm1, %xmm6
124; SSSE3-NEXT:    por %xmm4, %xmm6
125; SSSE3-NEXT:    pxor %xmm3, %xmm2
126; SSSE3-NEXT:    movdqa %xmm2, %xmm0
127; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm0
128; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
129; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm2
130; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
131; SSSE3-NEXT:    pand %xmm4, %xmm2
132; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
133; SSSE3-NEXT:    por %xmm2, %xmm0
134; SSSE3-NEXT:    pand %xmm0, %xmm3
135; SSSE3-NEXT:    pandn %xmm1, %xmm0
136; SSSE3-NEXT:    por %xmm3, %xmm0
137; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm6[0,2]
138; SSSE3-NEXT:    retq
139;
140; SSE41-LABEL: trunc_ssat_v4i64_v4i32:
141; SSE41:       # %bb.0:
142; SSE41-NEXT:    movdqa %xmm0, %xmm2
143; SSE41-NEXT:    movapd {{.*#+}} xmm4 = [2147483647,2147483647]
144; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,0,2147483648,0]
145; SSE41-NEXT:    pxor %xmm3, %xmm0
146; SSE41-NEXT:    movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
147; SSE41-NEXT:    movdqa %xmm8, %xmm5
148; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
149; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
150; SSE41-NEXT:    pcmpeqd %xmm8, %xmm0
151; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
152; SSE41-NEXT:    pand %xmm7, %xmm6
153; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
154; SSE41-NEXT:    por %xmm6, %xmm0
155; SSE41-NEXT:    movapd %xmm4, %xmm5
156; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm5
157; SSE41-NEXT:    movdqa %xmm1, %xmm0
158; SSE41-NEXT:    pxor %xmm3, %xmm0
159; SSE41-NEXT:    movdqa %xmm8, %xmm2
160; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
161; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2]
162; SSE41-NEXT:    pcmpeqd %xmm8, %xmm0
163; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
164; SSE41-NEXT:    pand %xmm6, %xmm7
165; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
166; SSE41-NEXT:    por %xmm7, %xmm0
167; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm4
168; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
169; SSE41-NEXT:    movapd %xmm4, %xmm0
170; SSE41-NEXT:    xorpd %xmm3, %xmm0
171; SSE41-NEXT:    movdqa {{.*#+}} xmm8 = [18446744069414584320,18446744069414584320]
172; SSE41-NEXT:    movapd %xmm0, %xmm6
173; SSE41-NEXT:    pcmpgtd %xmm8, %xmm6
174; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
175; SSE41-NEXT:    pcmpeqd %xmm8, %xmm0
176; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
177; SSE41-NEXT:    pand %xmm7, %xmm2
178; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
179; SSE41-NEXT:    por %xmm2, %xmm0
180; SSE41-NEXT:    movapd %xmm1, %xmm2
181; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
182; SSE41-NEXT:    xorpd %xmm5, %xmm3
183; SSE41-NEXT:    movapd %xmm3, %xmm0
184; SSE41-NEXT:    pcmpgtd %xmm8, %xmm0
185; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
186; SSE41-NEXT:    pcmpeqd %xmm8, %xmm3
187; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
188; SSE41-NEXT:    pand %xmm4, %xmm3
189; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
190; SSE41-NEXT:    por %xmm3, %xmm0
191; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm1
192; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
193; SSE41-NEXT:    movaps %xmm1, %xmm0
194; SSE41-NEXT:    retq
195;
196; AVX1-LABEL: trunc_ssat_v4i64_v4i32:
197; AVX1:       # %bb.0:
198; AVX1-NEXT:    vmovapd {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647]
199; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
200; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [2147483647,2147483647]
201; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
202; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm3
203; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
204; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
205; AVX1-NEXT:    vmovapd {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
206; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
207; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [18446744071562067968,18446744071562067968]
208; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
209; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm3
210; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
211; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
212; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
213; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
214; AVX1-NEXT:    vzeroupper
215; AVX1-NEXT:    retq
216;
217; AVX2-SLOW-LABEL: trunc_ssat_v4i64_v4i32:
218; AVX2-SLOW:       # %bb.0:
219; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647]
220; AVX2-SLOW-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
221; AVX2-SLOW-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
222; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
223; AVX2-SLOW-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
224; AVX2-SLOW-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
225; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
226; AVX2-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
227; AVX2-SLOW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
228; AVX2-SLOW-NEXT:    vzeroupper
229; AVX2-SLOW-NEXT:    retq
230;
231; AVX2-FAST-LABEL: trunc_ssat_v4i64_v4i32:
232; AVX2-FAST:       # %bb.0:
233; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647]
234; AVX2-FAST-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
235; AVX2-FAST-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
236; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
237; AVX2-FAST-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
238; AVX2-FAST-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
239; AVX2-FAST-NEXT:    vmovapd {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7]
240; AVX2-FAST-NEXT:    vpermps %ymm0, %ymm1, %ymm0
241; AVX2-FAST-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
242; AVX2-FAST-NEXT:    vzeroupper
243; AVX2-FAST-NEXT:    retq
244;
245; AVX512F-LABEL: trunc_ssat_v4i64_v4i32:
246; AVX512F:       # %bb.0:
247; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
248; AVX512F-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647]
249; AVX512F-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
250; AVX512F-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
251; AVX512F-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
252; AVX512F-NEXT:    vpmovqd %zmm0, %ymm0
253; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
254; AVX512F-NEXT:    vzeroupper
255; AVX512F-NEXT:    retq
256;
257; AVX512VL-LABEL: trunc_ssat_v4i64_v4i32:
258; AVX512VL:       # %bb.0:
259; AVX512VL-NEXT:    vpmovsqd %ymm0, %xmm0
260; AVX512VL-NEXT:    vzeroupper
261; AVX512VL-NEXT:    retq
262;
263; AVX512BW-LABEL: trunc_ssat_v4i64_v4i32:
264; AVX512BW:       # %bb.0:
265; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
266; AVX512BW-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647]
267; AVX512BW-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
268; AVX512BW-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
269; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
270; AVX512BW-NEXT:    vpmovqd %zmm0, %ymm0
271; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
272; AVX512BW-NEXT:    vzeroupper
273; AVX512BW-NEXT:    retq
274;
275; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i32:
276; AVX512BWVL:       # %bb.0:
277; AVX512BWVL-NEXT:    vpmovsqd %ymm0, %xmm0
278; AVX512BWVL-NEXT:    vzeroupper
279; AVX512BWVL-NEXT:    retq
280  %1 = icmp slt <4 x i64> %a0, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
281  %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
282  %3 = icmp sgt <4 x i64> %2, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
283  %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
284  %5 = trunc <4 x i64> %4 to <4 x i32>
285  ret <4 x i32> %5
286}
287
288
289define <8 x i32> @trunc_ssat_v8i64_v8i32(<8 x i64> %a0) {
290; SSE2-LABEL: trunc_ssat_v8i64_v8i32:
291; SSE2:       # %bb.0:
292; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [2147483647,2147483647]
293; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
294; SSE2-NEXT:    movdqa %xmm0, %xmm5
295; SSE2-NEXT:    pxor %xmm4, %xmm5
296; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [4294967295,4294967295]
297; SSE2-NEXT:    movdqa %xmm9, %xmm7
298; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
299; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
300; SSE2-NEXT:    pcmpeqd %xmm9, %xmm5
301; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
302; SSE2-NEXT:    pand %xmm10, %xmm6
303; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
304; SSE2-NEXT:    por %xmm6, %xmm5
305; SSE2-NEXT:    pand %xmm5, %xmm0
306; SSE2-NEXT:    pandn %xmm8, %xmm5
307; SSE2-NEXT:    por %xmm0, %xmm5
308; SSE2-NEXT:    movdqa %xmm1, %xmm0
309; SSE2-NEXT:    pxor %xmm4, %xmm0
310; SSE2-NEXT:    movdqa %xmm9, %xmm6
311; SSE2-NEXT:    pcmpgtd %xmm0, %xmm6
312; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
313; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
314; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
315; SSE2-NEXT:    pand %xmm10, %xmm7
316; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
317; SSE2-NEXT:    por %xmm7, %xmm0
318; SSE2-NEXT:    pand %xmm0, %xmm1
319; SSE2-NEXT:    pandn %xmm8, %xmm0
320; SSE2-NEXT:    por %xmm1, %xmm0
321; SSE2-NEXT:    movdqa %xmm2, %xmm1
322; SSE2-NEXT:    pxor %xmm4, %xmm1
323; SSE2-NEXT:    movdqa %xmm9, %xmm6
324; SSE2-NEXT:    pcmpgtd %xmm1, %xmm6
325; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
326; SSE2-NEXT:    pcmpeqd %xmm9, %xmm1
327; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
328; SSE2-NEXT:    pand %xmm7, %xmm1
329; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3]
330; SSE2-NEXT:    por %xmm1, %xmm7
331; SSE2-NEXT:    pand %xmm7, %xmm2
332; SSE2-NEXT:    pandn %xmm8, %xmm7
333; SSE2-NEXT:    por %xmm2, %xmm7
334; SSE2-NEXT:    movdqa %xmm3, %xmm1
335; SSE2-NEXT:    pxor %xmm4, %xmm1
336; SSE2-NEXT:    movdqa %xmm9, %xmm2
337; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
338; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2]
339; SSE2-NEXT:    pcmpeqd %xmm9, %xmm1
340; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
341; SSE2-NEXT:    pand %xmm6, %xmm1
342; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm2[1,1,3,3]
343; SSE2-NEXT:    por %xmm1, %xmm6
344; SSE2-NEXT:    pand %xmm6, %xmm3
345; SSE2-NEXT:    pandn %xmm8, %xmm6
346; SSE2-NEXT:    por %xmm3, %xmm6
347; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [18446744071562067968,18446744071562067968]
348; SSE2-NEXT:    movdqa %xmm6, %xmm1
349; SSE2-NEXT:    pxor %xmm4, %xmm1
350; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [18446744069414584320,18446744069414584320]
351; SSE2-NEXT:    movdqa %xmm1, %xmm2
352; SSE2-NEXT:    pcmpgtd %xmm9, %xmm2
353; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
354; SSE2-NEXT:    pcmpeqd %xmm9, %xmm1
355; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
356; SSE2-NEXT:    pand %xmm3, %xmm1
357; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
358; SSE2-NEXT:    por %xmm1, %xmm2
359; SSE2-NEXT:    pand %xmm2, %xmm6
360; SSE2-NEXT:    pandn %xmm8, %xmm2
361; SSE2-NEXT:    por %xmm6, %xmm2
362; SSE2-NEXT:    movdqa %xmm7, %xmm1
363; SSE2-NEXT:    pxor %xmm4, %xmm1
364; SSE2-NEXT:    movdqa %xmm1, %xmm3
365; SSE2-NEXT:    pcmpgtd %xmm9, %xmm3
366; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm3[0,0,2,2]
367; SSE2-NEXT:    pcmpeqd %xmm9, %xmm1
368; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[1,1,3,3]
369; SSE2-NEXT:    pand %xmm10, %xmm6
370; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
371; SSE2-NEXT:    por %xmm6, %xmm1
372; SSE2-NEXT:    pand %xmm1, %xmm7
373; SSE2-NEXT:    pandn %xmm8, %xmm1
374; SSE2-NEXT:    por %xmm7, %xmm1
375; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
376; SSE2-NEXT:    movdqa %xmm0, %xmm2
377; SSE2-NEXT:    pxor %xmm4, %xmm2
378; SSE2-NEXT:    movdqa %xmm2, %xmm3
379; SSE2-NEXT:    pcmpgtd %xmm9, %xmm3
380; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
381; SSE2-NEXT:    pcmpeqd %xmm9, %xmm2
382; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
383; SSE2-NEXT:    pand %xmm6, %xmm2
384; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
385; SSE2-NEXT:    por %xmm2, %xmm3
386; SSE2-NEXT:    pand %xmm3, %xmm0
387; SSE2-NEXT:    pandn %xmm8, %xmm3
388; SSE2-NEXT:    por %xmm0, %xmm3
389; SSE2-NEXT:    pxor %xmm5, %xmm4
390; SSE2-NEXT:    movdqa %xmm4, %xmm0
391; SSE2-NEXT:    pcmpgtd %xmm9, %xmm0
392; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
393; SSE2-NEXT:    pcmpeqd %xmm9, %xmm4
394; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
395; SSE2-NEXT:    pand %xmm2, %xmm4
396; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
397; SSE2-NEXT:    por %xmm4, %xmm0
398; SSE2-NEXT:    pand %xmm0, %xmm5
399; SSE2-NEXT:    pandn %xmm8, %xmm0
400; SSE2-NEXT:    por %xmm5, %xmm0
401; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
402; SSE2-NEXT:    retq
403;
404; SSSE3-LABEL: trunc_ssat_v8i64_v8i32:
405; SSSE3:       # %bb.0:
406; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [2147483647,2147483647]
407; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
408; SSSE3-NEXT:    movdqa %xmm0, %xmm5
409; SSSE3-NEXT:    pxor %xmm4, %xmm5
410; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [4294967295,4294967295]
411; SSSE3-NEXT:    movdqa %xmm9, %xmm7
412; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm7
413; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
414; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm5
415; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
416; SSSE3-NEXT:    pand %xmm10, %xmm6
417; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
418; SSSE3-NEXT:    por %xmm6, %xmm5
419; SSSE3-NEXT:    pand %xmm5, %xmm0
420; SSSE3-NEXT:    pandn %xmm8, %xmm5
421; SSSE3-NEXT:    por %xmm0, %xmm5
422; SSSE3-NEXT:    movdqa %xmm1, %xmm0
423; SSSE3-NEXT:    pxor %xmm4, %xmm0
424; SSSE3-NEXT:    movdqa %xmm9, %xmm6
425; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm6
426; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
427; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
428; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
429; SSSE3-NEXT:    pand %xmm10, %xmm7
430; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
431; SSSE3-NEXT:    por %xmm7, %xmm0
432; SSSE3-NEXT:    pand %xmm0, %xmm1
433; SSSE3-NEXT:    pandn %xmm8, %xmm0
434; SSSE3-NEXT:    por %xmm1, %xmm0
435; SSSE3-NEXT:    movdqa %xmm2, %xmm1
436; SSSE3-NEXT:    pxor %xmm4, %xmm1
437; SSSE3-NEXT:    movdqa %xmm9, %xmm6
438; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm6
439; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
440; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm1
441; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
442; SSSE3-NEXT:    pand %xmm7, %xmm1
443; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3]
444; SSSE3-NEXT:    por %xmm1, %xmm7
445; SSSE3-NEXT:    pand %xmm7, %xmm2
446; SSSE3-NEXT:    pandn %xmm8, %xmm7
447; SSSE3-NEXT:    por %xmm2, %xmm7
448; SSSE3-NEXT:    movdqa %xmm3, %xmm1
449; SSSE3-NEXT:    pxor %xmm4, %xmm1
450; SSSE3-NEXT:    movdqa %xmm9, %xmm2
451; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
452; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2]
453; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm1
454; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
455; SSSE3-NEXT:    pand %xmm6, %xmm1
456; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm2[1,1,3,3]
457; SSSE3-NEXT:    por %xmm1, %xmm6
458; SSSE3-NEXT:    pand %xmm6, %xmm3
459; SSSE3-NEXT:    pandn %xmm8, %xmm6
460; SSSE3-NEXT:    por %xmm3, %xmm6
461; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [18446744071562067968,18446744071562067968]
462; SSSE3-NEXT:    movdqa %xmm6, %xmm1
463; SSSE3-NEXT:    pxor %xmm4, %xmm1
464; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [18446744069414584320,18446744069414584320]
465; SSSE3-NEXT:    movdqa %xmm1, %xmm2
466; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm2
467; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
468; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm1
469; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
470; SSSE3-NEXT:    pand %xmm3, %xmm1
471; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
472; SSSE3-NEXT:    por %xmm1, %xmm2
473; SSSE3-NEXT:    pand %xmm2, %xmm6
474; SSSE3-NEXT:    pandn %xmm8, %xmm2
475; SSSE3-NEXT:    por %xmm6, %xmm2
476; SSSE3-NEXT:    movdqa %xmm7, %xmm1
477; SSSE3-NEXT:    pxor %xmm4, %xmm1
478; SSSE3-NEXT:    movdqa %xmm1, %xmm3
479; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm3
480; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm3[0,0,2,2]
481; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm1
482; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[1,1,3,3]
483; SSSE3-NEXT:    pand %xmm10, %xmm6
484; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
485; SSSE3-NEXT:    por %xmm6, %xmm1
486; SSSE3-NEXT:    pand %xmm1, %xmm7
487; SSSE3-NEXT:    pandn %xmm8, %xmm1
488; SSSE3-NEXT:    por %xmm7, %xmm1
489; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
490; SSSE3-NEXT:    movdqa %xmm0, %xmm2
491; SSSE3-NEXT:    pxor %xmm4, %xmm2
492; SSSE3-NEXT:    movdqa %xmm2, %xmm3
493; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm3
494; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
495; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm2
496; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
497; SSSE3-NEXT:    pand %xmm6, %xmm2
498; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
499; SSSE3-NEXT:    por %xmm2, %xmm3
500; SSSE3-NEXT:    pand %xmm3, %xmm0
501; SSSE3-NEXT:    pandn %xmm8, %xmm3
502; SSSE3-NEXT:    por %xmm0, %xmm3
503; SSSE3-NEXT:    pxor %xmm5, %xmm4
504; SSSE3-NEXT:    movdqa %xmm4, %xmm0
505; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm0
506; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
507; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm4
508; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
509; SSSE3-NEXT:    pand %xmm2, %xmm4
510; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
511; SSSE3-NEXT:    por %xmm4, %xmm0
512; SSSE3-NEXT:    pand %xmm0, %xmm5
513; SSSE3-NEXT:    pandn %xmm8, %xmm0
514; SSSE3-NEXT:    por %xmm5, %xmm0
515; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
516; SSSE3-NEXT:    retq
517;
518; SSE41-LABEL: trunc_ssat_v8i64_v8i32:
519; SSE41:       # %bb.0:
520; SSE41-NEXT:    movdqa %xmm0, %xmm9
521; SSE41-NEXT:    movapd {{.*#+}} xmm7 = [2147483647,2147483647]
522; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,0,2147483648,0]
523; SSE41-NEXT:    pxor %xmm5, %xmm0
524; SSE41-NEXT:    movdqa {{.*#+}} xmm10 = [4294967295,4294967295]
525; SSE41-NEXT:    movdqa %xmm10, %xmm6
526; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
527; SSE41-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
528; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
529; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
530; SSE41-NEXT:    pand %xmm8, %xmm4
531; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
532; SSE41-NEXT:    por %xmm4, %xmm0
533; SSE41-NEXT:    movapd %xmm7, %xmm8
534; SSE41-NEXT:    blendvpd %xmm0, %xmm9, %xmm8
535; SSE41-NEXT:    movdqa %xmm1, %xmm0
536; SSE41-NEXT:    pxor %xmm5, %xmm0
537; SSE41-NEXT:    movdqa %xmm10, %xmm4
538; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
539; SSE41-NEXT:    pshufd {{.*#+}} xmm9 = xmm4[0,0,2,2]
540; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
541; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
542; SSE41-NEXT:    pand %xmm9, %xmm6
543; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
544; SSE41-NEXT:    por %xmm6, %xmm0
545; SSE41-NEXT:    movapd %xmm7, %xmm9
546; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm9
547; SSE41-NEXT:    movdqa %xmm2, %xmm0
548; SSE41-NEXT:    pxor %xmm5, %xmm0
549; SSE41-NEXT:    movdqa %xmm10, %xmm1
550; SSE41-NEXT:    pcmpgtd %xmm0, %xmm1
551; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
552; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
553; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
554; SSE41-NEXT:    pand %xmm4, %xmm6
555; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
556; SSE41-NEXT:    por %xmm6, %xmm0
557; SSE41-NEXT:    movapd %xmm7, %xmm4
558; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm4
559; SSE41-NEXT:    movdqa %xmm3, %xmm0
560; SSE41-NEXT:    pxor %xmm5, %xmm0
561; SSE41-NEXT:    movdqa %xmm10, %xmm1
562; SSE41-NEXT:    pcmpgtd %xmm0, %xmm1
563; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
564; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
565; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
566; SSE41-NEXT:    pand %xmm2, %xmm6
567; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
568; SSE41-NEXT:    por %xmm6, %xmm0
569; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm7
570; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968]
571; SSE41-NEXT:    movapd %xmm7, %xmm0
572; SSE41-NEXT:    xorpd %xmm5, %xmm0
573; SSE41-NEXT:    movdqa {{.*#+}} xmm10 = [18446744069414584320,18446744069414584320]
574; SSE41-NEXT:    movapd %xmm0, %xmm1
575; SSE41-NEXT:    pcmpgtd %xmm10, %xmm1
576; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
577; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
578; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
579; SSE41-NEXT:    pand %xmm6, %xmm3
580; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
581; SSE41-NEXT:    por %xmm3, %xmm0
582; SSE41-NEXT:    movapd %xmm2, %xmm3
583; SSE41-NEXT:    blendvpd %xmm0, %xmm7, %xmm3
584; SSE41-NEXT:    movapd %xmm4, %xmm0
585; SSE41-NEXT:    xorpd %xmm5, %xmm0
586; SSE41-NEXT:    movapd %xmm0, %xmm1
587; SSE41-NEXT:    pcmpgtd %xmm10, %xmm1
588; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
589; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
590; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
591; SSE41-NEXT:    pand %xmm6, %xmm7
592; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
593; SSE41-NEXT:    por %xmm7, %xmm0
594; SSE41-NEXT:    movapd %xmm2, %xmm1
595; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm1
596; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
597; SSE41-NEXT:    movapd %xmm9, %xmm0
598; SSE41-NEXT:    xorpd %xmm5, %xmm0
599; SSE41-NEXT:    movapd %xmm0, %xmm3
600; SSE41-NEXT:    pcmpgtd %xmm10, %xmm3
601; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
602; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
603; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
604; SSE41-NEXT:    pand %xmm4, %xmm6
605; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
606; SSE41-NEXT:    por %xmm6, %xmm0
607; SSE41-NEXT:    movapd %xmm2, %xmm3
608; SSE41-NEXT:    blendvpd %xmm0, %xmm9, %xmm3
609; SSE41-NEXT:    xorpd %xmm8, %xmm5
610; SSE41-NEXT:    movapd %xmm5, %xmm0
611; SSE41-NEXT:    pcmpgtd %xmm10, %xmm0
612; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
613; SSE41-NEXT:    pcmpeqd %xmm10, %xmm5
614; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
615; SSE41-NEXT:    pand %xmm4, %xmm5
616; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
617; SSE41-NEXT:    por %xmm5, %xmm0
618; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm2
619; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
620; SSE41-NEXT:    movaps %xmm2, %xmm0
621; SSE41-NEXT:    retq
622;
623; AVX1-LABEL: trunc_ssat_v8i64_v8i32:
624; AVX1:       # %bb.0:
625; AVX1-NEXT:    vmovapd {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647]
626; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
627; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [2147483647,2147483647]
628; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
629; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm5
630; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm5, %ymm3
631; AVX1-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
632; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
633; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
634; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm4, %xmm4
635; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
636; AVX1-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
637; AVX1-NEXT:    vmovapd {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
638; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
639; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [18446744071562067968,18446744071562067968]
640; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
641; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm5
642; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm5, %ymm3
643; AVX1-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
644; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
645; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
646; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm1, %xmm4
647; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
648; AVX1-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
649; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
650; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
651; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
652; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
653; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
654; AVX1-NEXT:    retq
655;
656; AVX2-SLOW-LABEL: trunc_ssat_v8i64_v8i32:
657; AVX2-SLOW:       # %bb.0:
658; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647]
659; AVX2-SLOW-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
660; AVX2-SLOW-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
661; AVX2-SLOW-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
662; AVX2-SLOW-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
663; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
664; AVX2-SLOW-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
665; AVX2-SLOW-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
666; AVX2-SLOW-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
667; AVX2-SLOW-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
668; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
669; AVX2-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
670; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
671; AVX2-SLOW-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3]
672; AVX2-SLOW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
673; AVX2-SLOW-NEXT:    retq
674;
675; AVX2-FAST-LABEL: trunc_ssat_v8i64_v8i32:
676; AVX2-FAST:       # %bb.0:
677; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647]
678; AVX2-FAST-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
679; AVX2-FAST-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
680; AVX2-FAST-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
681; AVX2-FAST-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
682; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
683; AVX2-FAST-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
684; AVX2-FAST-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
685; AVX2-FAST-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
686; AVX2-FAST-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
687; AVX2-FAST-NEXT:    vmovapd {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
688; AVX2-FAST-NEXT:    vpermps %ymm0, %ymm2, %ymm0
689; AVX2-FAST-NEXT:    vpermps %ymm1, %ymm2, %ymm1
690; AVX2-FAST-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
691; AVX2-FAST-NEXT:    retq
692;
693; AVX512-LABEL: trunc_ssat_v8i64_v8i32:
694; AVX512:       # %bb.0:
695; AVX512-NEXT:    vpmovsqd %zmm0, %ymm0
696; AVX512-NEXT:    retq
697  %1 = icmp slt <8 x i64> %a0, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
698  %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
699  %3 = icmp sgt <8 x i64> %2, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
700  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
701  %5 = trunc <8 x i64> %4 to <8 x i32>
702  ret <8 x i32> %5
703}
704
705;
706; Signed saturation truncation to vXi16
707;
708
709define <8 x i16> @trunc_ssat_v8i64_v8i16(<8 x i64> %a0) {
710; SSE2-LABEL: trunc_ssat_v8i64_v8i16:
711; SSE2:       # %bb.0:
712; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [32767,32767]
713; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
714; SSE2-NEXT:    movdqa %xmm2, %xmm5
715; SSE2-NEXT:    pxor %xmm4, %xmm5
716; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [2147516415,2147516415]
717; SSE2-NEXT:    movdqa %xmm9, %xmm7
718; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
719; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
720; SSE2-NEXT:    pcmpeqd %xmm9, %xmm5
721; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
722; SSE2-NEXT:    pand %xmm10, %xmm6
723; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
724; SSE2-NEXT:    por %xmm6, %xmm5
725; SSE2-NEXT:    pand %xmm5, %xmm2
726; SSE2-NEXT:    pandn %xmm8, %xmm5
727; SSE2-NEXT:    por %xmm2, %xmm5
728; SSE2-NEXT:    movdqa %xmm3, %xmm2
729; SSE2-NEXT:    pxor %xmm4, %xmm2
730; SSE2-NEXT:    movdqa %xmm9, %xmm6
731; SSE2-NEXT:    pcmpgtd %xmm2, %xmm6
732; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
733; SSE2-NEXT:    pcmpeqd %xmm9, %xmm2
734; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
735; SSE2-NEXT:    pand %xmm10, %xmm7
736; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
737; SSE2-NEXT:    por %xmm7, %xmm2
738; SSE2-NEXT:    pand %xmm2, %xmm3
739; SSE2-NEXT:    pandn %xmm8, %xmm2
740; SSE2-NEXT:    por %xmm3, %xmm2
741; SSE2-NEXT:    movdqa %xmm0, %xmm3
742; SSE2-NEXT:    pxor %xmm4, %xmm3
743; SSE2-NEXT:    movdqa %xmm9, %xmm6
744; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
745; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
746; SSE2-NEXT:    pcmpeqd %xmm9, %xmm3
747; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
748; SSE2-NEXT:    pand %xmm10, %xmm7
749; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
750; SSE2-NEXT:    por %xmm7, %xmm3
751; SSE2-NEXT:    pand %xmm3, %xmm0
752; SSE2-NEXT:    pandn %xmm8, %xmm3
753; SSE2-NEXT:    por %xmm0, %xmm3
754; SSE2-NEXT:    movdqa %xmm1, %xmm0
755; SSE2-NEXT:    pxor %xmm4, %xmm0
756; SSE2-NEXT:    movdqa %xmm9, %xmm6
757; SSE2-NEXT:    pcmpgtd %xmm0, %xmm6
758; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
759; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
760; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
761; SSE2-NEXT:    pand %xmm7, %xmm0
762; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3]
763; SSE2-NEXT:    por %xmm0, %xmm7
764; SSE2-NEXT:    pand %xmm7, %xmm1
765; SSE2-NEXT:    pandn %xmm8, %xmm7
766; SSE2-NEXT:    por %xmm1, %xmm7
767; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709518848,18446744073709518848]
768; SSE2-NEXT:    movdqa %xmm7, %xmm0
769; SSE2-NEXT:    pxor %xmm4, %xmm0
770; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562035200,18446744071562035200]
771; SSE2-NEXT:    movdqa %xmm0, %xmm1
772; SSE2-NEXT:    pcmpgtd %xmm9, %xmm1
773; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
774; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
775; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
776; SSE2-NEXT:    pand %xmm6, %xmm0
777; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
778; SSE2-NEXT:    por %xmm0, %xmm1
779; SSE2-NEXT:    pand %xmm1, %xmm7
780; SSE2-NEXT:    pandn %xmm8, %xmm1
781; SSE2-NEXT:    por %xmm7, %xmm1
782; SSE2-NEXT:    movdqa %xmm3, %xmm0
783; SSE2-NEXT:    pxor %xmm4, %xmm0
784; SSE2-NEXT:    movdqa %xmm0, %xmm6
785; SSE2-NEXT:    pcmpgtd %xmm9, %xmm6
786; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
787; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
788; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
789; SSE2-NEXT:    pand %xmm10, %xmm7
790; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
791; SSE2-NEXT:    por %xmm7, %xmm0
792; SSE2-NEXT:    pand %xmm0, %xmm3
793; SSE2-NEXT:    pandn %xmm8, %xmm0
794; SSE2-NEXT:    por %xmm3, %xmm0
795; SSE2-NEXT:    packssdw %xmm1, %xmm0
796; SSE2-NEXT:    movdqa %xmm2, %xmm1
797; SSE2-NEXT:    pxor %xmm4, %xmm1
798; SSE2-NEXT:    movdqa %xmm1, %xmm3
799; SSE2-NEXT:    pcmpgtd %xmm9, %xmm3
800; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
801; SSE2-NEXT:    pcmpeqd %xmm9, %xmm1
802; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
803; SSE2-NEXT:    pand %xmm6, %xmm1
804; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
805; SSE2-NEXT:    por %xmm1, %xmm3
806; SSE2-NEXT:    pand %xmm3, %xmm2
807; SSE2-NEXT:    pandn %xmm8, %xmm3
808; SSE2-NEXT:    por %xmm2, %xmm3
809; SSE2-NEXT:    pxor %xmm5, %xmm4
810; SSE2-NEXT:    movdqa %xmm4, %xmm1
811; SSE2-NEXT:    pcmpgtd %xmm9, %xmm1
812; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
813; SSE2-NEXT:    pcmpeqd %xmm9, %xmm4
814; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
815; SSE2-NEXT:    pand %xmm2, %xmm4
816; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
817; SSE2-NEXT:    por %xmm4, %xmm1
818; SSE2-NEXT:    pand %xmm1, %xmm5
819; SSE2-NEXT:    pandn %xmm8, %xmm1
820; SSE2-NEXT:    por %xmm5, %xmm1
821; SSE2-NEXT:    packssdw %xmm3, %xmm1
822; SSE2-NEXT:    packssdw %xmm1, %xmm0
823; SSE2-NEXT:    retq
824;
825; SSSE3-LABEL: trunc_ssat_v8i64_v8i16:
826; SSSE3:       # %bb.0:
827; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [32767,32767]
828; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
829; SSSE3-NEXT:    movdqa %xmm2, %xmm5
830; SSSE3-NEXT:    pxor %xmm4, %xmm5
831; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [2147516415,2147516415]
832; SSSE3-NEXT:    movdqa %xmm9, %xmm7
833; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm7
834; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
835; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm5
836; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
837; SSSE3-NEXT:    pand %xmm10, %xmm6
838; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
839; SSSE3-NEXT:    por %xmm6, %xmm5
840; SSSE3-NEXT:    pand %xmm5, %xmm2
841; SSSE3-NEXT:    pandn %xmm8, %xmm5
842; SSSE3-NEXT:    por %xmm2, %xmm5
843; SSSE3-NEXT:    movdqa %xmm3, %xmm2
844; SSSE3-NEXT:    pxor %xmm4, %xmm2
845; SSSE3-NEXT:    movdqa %xmm9, %xmm6
846; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm6
847; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
848; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm2
849; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
850; SSSE3-NEXT:    pand %xmm10, %xmm7
851; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
852; SSSE3-NEXT:    por %xmm7, %xmm2
853; SSSE3-NEXT:    pand %xmm2, %xmm3
854; SSSE3-NEXT:    pandn %xmm8, %xmm2
855; SSSE3-NEXT:    por %xmm3, %xmm2
856; SSSE3-NEXT:    movdqa %xmm0, %xmm3
857; SSSE3-NEXT:    pxor %xmm4, %xmm3
858; SSSE3-NEXT:    movdqa %xmm9, %xmm6
859; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm6
860; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
861; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm3
862; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
863; SSSE3-NEXT:    pand %xmm10, %xmm7
864; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
865; SSSE3-NEXT:    por %xmm7, %xmm3
866; SSSE3-NEXT:    pand %xmm3, %xmm0
867; SSSE3-NEXT:    pandn %xmm8, %xmm3
868; SSSE3-NEXT:    por %xmm0, %xmm3
869; SSSE3-NEXT:    movdqa %xmm1, %xmm0
870; SSSE3-NEXT:    pxor %xmm4, %xmm0
871; SSSE3-NEXT:    movdqa %xmm9, %xmm6
872; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm6
873; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
874; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
875; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
876; SSSE3-NEXT:    pand %xmm7, %xmm0
877; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3]
878; SSSE3-NEXT:    por %xmm0, %xmm7
879; SSSE3-NEXT:    pand %xmm7, %xmm1
880; SSSE3-NEXT:    pandn %xmm8, %xmm7
881; SSSE3-NEXT:    por %xmm1, %xmm7
882; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709518848,18446744073709518848]
883; SSSE3-NEXT:    movdqa %xmm7, %xmm0
884; SSSE3-NEXT:    pxor %xmm4, %xmm0
885; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562035200,18446744071562035200]
886; SSSE3-NEXT:    movdqa %xmm0, %xmm1
887; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm1
888; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
889; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
890; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
891; SSSE3-NEXT:    pand %xmm6, %xmm0
892; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
893; SSSE3-NEXT:    por %xmm0, %xmm1
894; SSSE3-NEXT:    pand %xmm1, %xmm7
895; SSSE3-NEXT:    pandn %xmm8, %xmm1
896; SSSE3-NEXT:    por %xmm7, %xmm1
897; SSSE3-NEXT:    movdqa %xmm3, %xmm0
898; SSSE3-NEXT:    pxor %xmm4, %xmm0
899; SSSE3-NEXT:    movdqa %xmm0, %xmm6
900; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm6
901; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
902; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
903; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
904; SSSE3-NEXT:    pand %xmm10, %xmm7
905; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
906; SSSE3-NEXT:    por %xmm7, %xmm0
907; SSSE3-NEXT:    pand %xmm0, %xmm3
908; SSSE3-NEXT:    pandn %xmm8, %xmm0
909; SSSE3-NEXT:    por %xmm3, %xmm0
910; SSSE3-NEXT:    packssdw %xmm1, %xmm0
911; SSSE3-NEXT:    movdqa %xmm2, %xmm1
912; SSSE3-NEXT:    pxor %xmm4, %xmm1
913; SSSE3-NEXT:    movdqa %xmm1, %xmm3
914; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm3
915; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
916; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm1
917; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
918; SSSE3-NEXT:    pand %xmm6, %xmm1
919; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
920; SSSE3-NEXT:    por %xmm1, %xmm3
921; SSSE3-NEXT:    pand %xmm3, %xmm2
922; SSSE3-NEXT:    pandn %xmm8, %xmm3
923; SSSE3-NEXT:    por %xmm2, %xmm3
924; SSSE3-NEXT:    pxor %xmm5, %xmm4
925; SSSE3-NEXT:    movdqa %xmm4, %xmm1
926; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm1
927; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
928; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm4
929; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
930; SSSE3-NEXT:    pand %xmm2, %xmm4
931; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
932; SSSE3-NEXT:    por %xmm4, %xmm1
933; SSSE3-NEXT:    pand %xmm1, %xmm5
934; SSSE3-NEXT:    pandn %xmm8, %xmm1
935; SSSE3-NEXT:    por %xmm5, %xmm1
936; SSSE3-NEXT:    packssdw %xmm3, %xmm1
937; SSSE3-NEXT:    packssdw %xmm1, %xmm0
938; SSSE3-NEXT:    retq
939;
940; SSE41-LABEL: trunc_ssat_v8i64_v8i16:
941; SSE41:       # %bb.0:
942; SSE41-NEXT:    movdqa %xmm0, %xmm10
943; SSE41-NEXT:    movapd {{.*#+}} xmm7 = [32767,32767]
944; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,0,2147483648,0]
945; SSE41-NEXT:    movdqa %xmm2, %xmm0
946; SSE41-NEXT:    pxor %xmm5, %xmm0
947; SSE41-NEXT:    movdqa {{.*#+}} xmm11 = [2147516415,2147516415]
948; SSE41-NEXT:    movdqa %xmm11, %xmm6
949; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
950; SSE41-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
951; SSE41-NEXT:    pcmpeqd %xmm11, %xmm0
952; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
953; SSE41-NEXT:    pand %xmm8, %xmm4
954; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
955; SSE41-NEXT:    por %xmm4, %xmm0
956; SSE41-NEXT:    movapd %xmm7, %xmm8
957; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm8
958; SSE41-NEXT:    movdqa %xmm3, %xmm0
959; SSE41-NEXT:    pxor %xmm5, %xmm0
960; SSE41-NEXT:    movdqa %xmm11, %xmm2
961; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
962; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
963; SSE41-NEXT:    pcmpeqd %xmm11, %xmm0
964; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
965; SSE41-NEXT:    pand %xmm4, %xmm6
966; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
967; SSE41-NEXT:    por %xmm6, %xmm0
968; SSE41-NEXT:    movapd %xmm7, %xmm9
969; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm9
970; SSE41-NEXT:    movdqa %xmm10, %xmm0
971; SSE41-NEXT:    pxor %xmm5, %xmm0
972; SSE41-NEXT:    movdqa %xmm11, %xmm2
973; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
974; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
975; SSE41-NEXT:    pcmpeqd %xmm11, %xmm0
976; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
977; SSE41-NEXT:    pand %xmm3, %xmm4
978; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
979; SSE41-NEXT:    por %xmm4, %xmm0
980; SSE41-NEXT:    movapd %xmm7, %xmm2
981; SSE41-NEXT:    blendvpd %xmm0, %xmm10, %xmm2
982; SSE41-NEXT:    movdqa %xmm1, %xmm0
983; SSE41-NEXT:    pxor %xmm5, %xmm0
984; SSE41-NEXT:    movdqa %xmm11, %xmm3
985; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
986; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
987; SSE41-NEXT:    pcmpeqd %xmm11, %xmm0
988; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
989; SSE41-NEXT:    pand %xmm4, %xmm6
990; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
991; SSE41-NEXT:    por %xmm6, %xmm0
992; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm7
993; SSE41-NEXT:    movapd {{.*#+}} xmm3 = [18446744073709518848,18446744073709518848]
994; SSE41-NEXT:    movapd %xmm7, %xmm0
995; SSE41-NEXT:    xorpd %xmm5, %xmm0
996; SSE41-NEXT:    movdqa {{.*#+}} xmm10 = [18446744071562035200,18446744071562035200]
997; SSE41-NEXT:    movapd %xmm0, %xmm1
998; SSE41-NEXT:    pcmpgtd %xmm10, %xmm1
999; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
1000; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
1001; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1002; SSE41-NEXT:    pand %xmm6, %xmm4
1003; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1004; SSE41-NEXT:    por %xmm4, %xmm0
1005; SSE41-NEXT:    movapd %xmm3, %xmm4
1006; SSE41-NEXT:    blendvpd %xmm0, %xmm7, %xmm4
1007; SSE41-NEXT:    movapd %xmm2, %xmm0
1008; SSE41-NEXT:    xorpd %xmm5, %xmm0
1009; SSE41-NEXT:    movapd %xmm0, %xmm1
1010; SSE41-NEXT:    pcmpgtd %xmm10, %xmm1
1011; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
1012; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
1013; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1014; SSE41-NEXT:    pand %xmm6, %xmm7
1015; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1016; SSE41-NEXT:    por %xmm7, %xmm0
1017; SSE41-NEXT:    movapd %xmm3, %xmm1
1018; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
1019; SSE41-NEXT:    packssdw %xmm4, %xmm1
1020; SSE41-NEXT:    movapd %xmm9, %xmm0
1021; SSE41-NEXT:    xorpd %xmm5, %xmm0
1022; SSE41-NEXT:    movapd %xmm0, %xmm2
1023; SSE41-NEXT:    pcmpgtd %xmm10, %xmm2
1024; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1025; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
1026; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1027; SSE41-NEXT:    pand %xmm4, %xmm6
1028; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1029; SSE41-NEXT:    por %xmm6, %xmm0
1030; SSE41-NEXT:    movapd %xmm3, %xmm2
1031; SSE41-NEXT:    blendvpd %xmm0, %xmm9, %xmm2
1032; SSE41-NEXT:    xorpd %xmm8, %xmm5
1033; SSE41-NEXT:    movapd %xmm5, %xmm0
1034; SSE41-NEXT:    pcmpgtd %xmm10, %xmm0
1035; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
1036; SSE41-NEXT:    pcmpeqd %xmm10, %xmm5
1037; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1038; SSE41-NEXT:    pand %xmm4, %xmm5
1039; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1040; SSE41-NEXT:    por %xmm5, %xmm0
1041; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm3
1042; SSE41-NEXT:    packssdw %xmm2, %xmm3
1043; SSE41-NEXT:    packssdw %xmm3, %xmm1
1044; SSE41-NEXT:    movdqa %xmm1, %xmm0
1045; SSE41-NEXT:    retq
1046;
1047; AVX1-LABEL: trunc_ssat_v8i64_v8i16:
1048; AVX1:       # %bb.0:
1049; AVX1-NEXT:    vmovapd {{.*#+}} ymm2 = [32767,32767,32767,32767]
1050; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1051; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [32767,32767]
1052; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
1053; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm5
1054; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm5, %ymm3
1055; AVX1-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1056; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1057; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
1058; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm4, %xmm4
1059; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
1060; AVX1-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
1061; AVX1-NEXT:    vmovapd {{.*#+}} ymm2 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848]
1062; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1063; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [18446744073709518848,18446744073709518848]
1064; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
1065; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm5
1066; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm5, %ymm3
1067; AVX1-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
1068; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1069; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
1070; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm1, %xmm4
1071; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
1072; AVX1-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1073; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1074; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
1075; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1076; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1077; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1078; AVX1-NEXT:    vzeroupper
1079; AVX1-NEXT:    retq
1080;
1081; AVX2-LABEL: trunc_ssat_v8i64_v8i16:
1082; AVX2:       # %bb.0:
1083; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [32767,32767,32767,32767]
1084; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
1085; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
1086; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
1087; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1088; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848]
1089; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
1090; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1091; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
1092; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
1093; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
1094; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1095; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1096; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1097; AVX2-NEXT:    vzeroupper
1098; AVX2-NEXT:    retq
1099;
1100; AVX512-LABEL: trunc_ssat_v8i64_v8i16:
1101; AVX512:       # %bb.0:
1102; AVX512-NEXT:    vpmovsqw %zmm0, %xmm0
1103; AVX512-NEXT:    vzeroupper
1104; AVX512-NEXT:    retq
1105  %1 = icmp slt <8 x i64> %a0, <i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767>
1106  %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767>
1107  %3 = icmp sgt <8 x i64> %2, <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768>
1108  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768>
1109  %5 = trunc <8 x i64> %4 to <8 x i16>
1110  ret <8 x i16> %5
1111}
1112
1113define <8 x i16> @trunc_ssat_v8i32_v8i16(<8 x i32> %a0) {
1114; SSE-LABEL: trunc_ssat_v8i32_v8i16:
1115; SSE:       # %bb.0:
1116; SSE-NEXT:    packssdw %xmm1, %xmm0
1117; SSE-NEXT:    retq
1118;
1119; AVX1-LABEL: trunc_ssat_v8i32_v8i16:
1120; AVX1:       # %bb.0:
1121; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1122; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1123; AVX1-NEXT:    vzeroupper
1124; AVX1-NEXT:    retq
1125;
1126; AVX2-LABEL: trunc_ssat_v8i32_v8i16:
1127; AVX2:       # %bb.0:
1128; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1129; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1130; AVX2-NEXT:    vzeroupper
1131; AVX2-NEXT:    retq
1132;
1133; AVX512F-LABEL: trunc_ssat_v8i32_v8i16:
1134; AVX512F:       # %bb.0:
1135; AVX512F-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1136; AVX512F-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
1137; AVX512F-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528]
1138; AVX512F-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
1139; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
1140; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1141; AVX512F-NEXT:    vzeroupper
1142; AVX512F-NEXT:    retq
1143;
1144; AVX512VL-LABEL: trunc_ssat_v8i32_v8i16:
1145; AVX512VL:       # %bb.0:
1146; AVX512VL-NEXT:    vpmovsdw %ymm0, %xmm0
1147; AVX512VL-NEXT:    vzeroupper
1148; AVX512VL-NEXT:    retq
1149;
1150; AVX512BW-LABEL: trunc_ssat_v8i32_v8i16:
1151; AVX512BW:       # %bb.0:
1152; AVX512BW-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1153; AVX512BW-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
1154; AVX512BW-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528]
1155; AVX512BW-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
1156; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
1157; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1158; AVX512BW-NEXT:    vzeroupper
1159; AVX512BW-NEXT:    retq
1160;
1161; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i16:
1162; AVX512BWVL:       # %bb.0:
1163; AVX512BWVL-NEXT:    vpmovsdw %ymm0, %xmm0
1164; AVX512BWVL-NEXT:    vzeroupper
1165; AVX512BWVL-NEXT:    retq
1166  %1 = icmp slt <8 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
1167  %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
1168  %3 = icmp sgt <8 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
1169  %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
1170  %5 = trunc <8 x i32> %4 to <8 x i16>
1171  ret <8 x i16> %5
1172}
1173
1174define <16 x i16> @trunc_ssat_v16i32_v16i16(<16 x i32> %a0) {
1175; SSE-LABEL: trunc_ssat_v16i32_v16i16:
1176; SSE:       # %bb.0:
1177; SSE-NEXT:    packssdw %xmm1, %xmm0
1178; SSE-NEXT:    packssdw %xmm3, %xmm2
1179; SSE-NEXT:    movdqa %xmm2, %xmm1
1180; SSE-NEXT:    retq
1181;
1182; AVX1-LABEL: trunc_ssat_v16i32_v16i16:
1183; AVX1:       # %bb.0:
1184; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1185; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
1186; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1187; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1188; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1189; AVX1-NEXT:    retq
1190;
1191; AVX2-LABEL: trunc_ssat_v16i32_v16i16:
1192; AVX2:       # %bb.0:
1193; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
1194; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1195; AVX2-NEXT:    retq
1196;
1197; AVX512-LABEL: trunc_ssat_v16i32_v16i16:
1198; AVX512:       # %bb.0:
1199; AVX512-NEXT:    vpmovsdw %zmm0, %ymm0
1200; AVX512-NEXT:    retq
1201  %1 = icmp slt <16 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
1202  %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
1203  %3 = icmp sgt <16 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
1204  %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
1205  %5 = trunc <16 x i32> %4 to <16 x i16>
1206  ret <16 x i16> %5
1207}
1208
1209;
1210; Signed saturation truncation to v16i8
1211;
1212
1213define <8 x i8> @trunc_ssat_v8i64_v8i8(<8 x i64> %a0) {
1214; SSE2-LABEL: trunc_ssat_v8i64_v8i8:
1215; SSE2:       # %bb.0:
1216; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [127,127]
1217; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
1218; SSE2-NEXT:    movdqa %xmm2, %xmm5
1219; SSE2-NEXT:    pxor %xmm4, %xmm5
1220; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [2147483775,2147483775]
1221; SSE2-NEXT:    movdqa %xmm9, %xmm7
1222; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
1223; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
1224; SSE2-NEXT:    pcmpeqd %xmm9, %xmm5
1225; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
1226; SSE2-NEXT:    pand %xmm10, %xmm6
1227; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1228; SSE2-NEXT:    por %xmm6, %xmm5
1229; SSE2-NEXT:    pand %xmm5, %xmm2
1230; SSE2-NEXT:    pandn %xmm8, %xmm5
1231; SSE2-NEXT:    por %xmm2, %xmm5
1232; SSE2-NEXT:    movdqa %xmm3, %xmm2
1233; SSE2-NEXT:    pxor %xmm4, %xmm2
1234; SSE2-NEXT:    movdqa %xmm9, %xmm6
1235; SSE2-NEXT:    pcmpgtd %xmm2, %xmm6
1236; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
1237; SSE2-NEXT:    pcmpeqd %xmm9, %xmm2
1238; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
1239; SSE2-NEXT:    pand %xmm10, %xmm7
1240; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
1241; SSE2-NEXT:    por %xmm7, %xmm2
1242; SSE2-NEXT:    pand %xmm2, %xmm3
1243; SSE2-NEXT:    pandn %xmm8, %xmm2
1244; SSE2-NEXT:    por %xmm3, %xmm2
1245; SSE2-NEXT:    movdqa %xmm0, %xmm3
1246; SSE2-NEXT:    pxor %xmm4, %xmm3
1247; SSE2-NEXT:    movdqa %xmm9, %xmm6
1248; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
1249; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
1250; SSE2-NEXT:    pcmpeqd %xmm9, %xmm3
1251; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
1252; SSE2-NEXT:    pand %xmm10, %xmm7
1253; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
1254; SSE2-NEXT:    por %xmm7, %xmm3
1255; SSE2-NEXT:    pand %xmm3, %xmm0
1256; SSE2-NEXT:    pandn %xmm8, %xmm3
1257; SSE2-NEXT:    por %xmm0, %xmm3
1258; SSE2-NEXT:    movdqa %xmm1, %xmm0
1259; SSE2-NEXT:    pxor %xmm4, %xmm0
1260; SSE2-NEXT:    movdqa %xmm9, %xmm6
1261; SSE2-NEXT:    pcmpgtd %xmm0, %xmm6
1262; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1263; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
1264; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1265; SSE2-NEXT:    pand %xmm7, %xmm0
1266; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3]
1267; SSE2-NEXT:    por %xmm0, %xmm7
1268; SSE2-NEXT:    pand %xmm7, %xmm1
1269; SSE2-NEXT:    pandn %xmm8, %xmm7
1270; SSE2-NEXT:    por %xmm1, %xmm7
1271; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488]
1272; SSE2-NEXT:    movdqa %xmm7, %xmm0
1273; SSE2-NEXT:    pxor %xmm4, %xmm0
1274; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840]
1275; SSE2-NEXT:    movdqa %xmm0, %xmm1
1276; SSE2-NEXT:    pcmpgtd %xmm9, %xmm1
1277; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
1278; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
1279; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1280; SSE2-NEXT:    pand %xmm6, %xmm0
1281; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1282; SSE2-NEXT:    por %xmm0, %xmm1
1283; SSE2-NEXT:    pand %xmm1, %xmm7
1284; SSE2-NEXT:    pandn %xmm8, %xmm1
1285; SSE2-NEXT:    por %xmm7, %xmm1
1286; SSE2-NEXT:    movdqa %xmm3, %xmm0
1287; SSE2-NEXT:    pxor %xmm4, %xmm0
1288; SSE2-NEXT:    movdqa %xmm0, %xmm6
1289; SSE2-NEXT:    pcmpgtd %xmm9, %xmm6
1290; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
1291; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
1292; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1293; SSE2-NEXT:    pand %xmm10, %xmm7
1294; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
1295; SSE2-NEXT:    por %xmm7, %xmm0
1296; SSE2-NEXT:    pand %xmm0, %xmm3
1297; SSE2-NEXT:    pandn %xmm8, %xmm0
1298; SSE2-NEXT:    por %xmm3, %xmm0
1299; SSE2-NEXT:    packssdw %xmm1, %xmm0
1300; SSE2-NEXT:    movdqa %xmm2, %xmm1
1301; SSE2-NEXT:    pxor %xmm4, %xmm1
1302; SSE2-NEXT:    movdqa %xmm1, %xmm3
1303; SSE2-NEXT:    pcmpgtd %xmm9, %xmm3
1304; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
1305; SSE2-NEXT:    pcmpeqd %xmm9, %xmm1
1306; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1307; SSE2-NEXT:    pand %xmm6, %xmm1
1308; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1309; SSE2-NEXT:    por %xmm1, %xmm3
1310; SSE2-NEXT:    pand %xmm3, %xmm2
1311; SSE2-NEXT:    pandn %xmm8, %xmm3
1312; SSE2-NEXT:    por %xmm2, %xmm3
1313; SSE2-NEXT:    pxor %xmm5, %xmm4
1314; SSE2-NEXT:    movdqa %xmm4, %xmm1
1315; SSE2-NEXT:    pcmpgtd %xmm9, %xmm1
1316; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
1317; SSE2-NEXT:    pcmpeqd %xmm9, %xmm4
1318; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1319; SSE2-NEXT:    pand %xmm2, %xmm4
1320; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1321; SSE2-NEXT:    por %xmm4, %xmm1
1322; SSE2-NEXT:    pand %xmm1, %xmm5
1323; SSE2-NEXT:    pandn %xmm8, %xmm1
1324; SSE2-NEXT:    por %xmm5, %xmm1
1325; SSE2-NEXT:    packssdw %xmm3, %xmm1
1326; SSE2-NEXT:    packssdw %xmm1, %xmm0
1327; SSE2-NEXT:    retq
1328;
1329; SSSE3-LABEL: trunc_ssat_v8i64_v8i8:
1330; SSSE3:       # %bb.0:
1331; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [127,127]
1332; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
1333; SSSE3-NEXT:    movdqa %xmm2, %xmm5
1334; SSSE3-NEXT:    pxor %xmm4, %xmm5
1335; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [2147483775,2147483775]
1336; SSSE3-NEXT:    movdqa %xmm9, %xmm7
1337; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm7
1338; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
1339; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm5
1340; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
1341; SSSE3-NEXT:    pand %xmm10, %xmm6
1342; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1343; SSSE3-NEXT:    por %xmm6, %xmm5
1344; SSSE3-NEXT:    pand %xmm5, %xmm2
1345; SSSE3-NEXT:    pandn %xmm8, %xmm5
1346; SSSE3-NEXT:    por %xmm2, %xmm5
1347; SSSE3-NEXT:    movdqa %xmm3, %xmm2
1348; SSSE3-NEXT:    pxor %xmm4, %xmm2
1349; SSSE3-NEXT:    movdqa %xmm9, %xmm6
1350; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm6
1351; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
1352; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm2
1353; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
1354; SSSE3-NEXT:    pand %xmm10, %xmm7
1355; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
1356; SSSE3-NEXT:    por %xmm7, %xmm2
1357; SSSE3-NEXT:    pand %xmm2, %xmm3
1358; SSSE3-NEXT:    pandn %xmm8, %xmm2
1359; SSSE3-NEXT:    por %xmm3, %xmm2
1360; SSSE3-NEXT:    movdqa %xmm0, %xmm3
1361; SSSE3-NEXT:    pxor %xmm4, %xmm3
1362; SSSE3-NEXT:    movdqa %xmm9, %xmm6
1363; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm6
1364; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
1365; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm3
1366; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
1367; SSSE3-NEXT:    pand %xmm10, %xmm7
1368; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
1369; SSSE3-NEXT:    por %xmm7, %xmm3
1370; SSSE3-NEXT:    pand %xmm3, %xmm0
1371; SSSE3-NEXT:    pandn %xmm8, %xmm3
1372; SSSE3-NEXT:    por %xmm0, %xmm3
1373; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1374; SSSE3-NEXT:    pxor %xmm4, %xmm0
1375; SSSE3-NEXT:    movdqa %xmm9, %xmm6
1376; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm6
1377; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1378; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
1379; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1380; SSSE3-NEXT:    pand %xmm7, %xmm0
1381; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3]
1382; SSSE3-NEXT:    por %xmm0, %xmm7
1383; SSSE3-NEXT:    pand %xmm7, %xmm1
1384; SSSE3-NEXT:    pandn %xmm8, %xmm7
1385; SSSE3-NEXT:    por %xmm1, %xmm7
1386; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488]
1387; SSSE3-NEXT:    movdqa %xmm7, %xmm0
1388; SSSE3-NEXT:    pxor %xmm4, %xmm0
1389; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840]
1390; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1391; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm1
1392; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
1393; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
1394; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1395; SSSE3-NEXT:    pand %xmm6, %xmm0
1396; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1397; SSSE3-NEXT:    por %xmm0, %xmm1
1398; SSSE3-NEXT:    pand %xmm1, %xmm7
1399; SSSE3-NEXT:    pandn %xmm8, %xmm1
1400; SSSE3-NEXT:    por %xmm7, %xmm1
1401; SSSE3-NEXT:    movdqa %xmm3, %xmm0
1402; SSSE3-NEXT:    pxor %xmm4, %xmm0
1403; SSSE3-NEXT:    movdqa %xmm0, %xmm6
1404; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm6
1405; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
1406; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
1407; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1408; SSSE3-NEXT:    pand %xmm10, %xmm7
1409; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
1410; SSSE3-NEXT:    por %xmm7, %xmm0
1411; SSSE3-NEXT:    pand %xmm0, %xmm3
1412; SSSE3-NEXT:    pandn %xmm8, %xmm0
1413; SSSE3-NEXT:    por %xmm3, %xmm0
1414; SSSE3-NEXT:    packssdw %xmm1, %xmm0
1415; SSSE3-NEXT:    movdqa %xmm2, %xmm1
1416; SSSE3-NEXT:    pxor %xmm4, %xmm1
1417; SSSE3-NEXT:    movdqa %xmm1, %xmm3
1418; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm3
1419; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
1420; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm1
1421; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1422; SSSE3-NEXT:    pand %xmm6, %xmm1
1423; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1424; SSSE3-NEXT:    por %xmm1, %xmm3
1425; SSSE3-NEXT:    pand %xmm3, %xmm2
1426; SSSE3-NEXT:    pandn %xmm8, %xmm3
1427; SSSE3-NEXT:    por %xmm2, %xmm3
1428; SSSE3-NEXT:    pxor %xmm5, %xmm4
1429; SSSE3-NEXT:    movdqa %xmm4, %xmm1
1430; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm1
1431; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
1432; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm4
1433; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1434; SSSE3-NEXT:    pand %xmm2, %xmm4
1435; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1436; SSSE3-NEXT:    por %xmm4, %xmm1
1437; SSSE3-NEXT:    pand %xmm1, %xmm5
1438; SSSE3-NEXT:    pandn %xmm8, %xmm1
1439; SSSE3-NEXT:    por %xmm5, %xmm1
1440; SSSE3-NEXT:    packssdw %xmm3, %xmm1
1441; SSSE3-NEXT:    packssdw %xmm1, %xmm0
1442; SSSE3-NEXT:    retq
1443;
1444; SSE41-LABEL: trunc_ssat_v8i64_v8i8:
1445; SSE41:       # %bb.0:
1446; SSE41-NEXT:    movdqa %xmm0, %xmm10
1447; SSE41-NEXT:    movapd {{.*#+}} xmm7 = [127,127]
1448; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,0,2147483648,0]
1449; SSE41-NEXT:    movdqa %xmm2, %xmm0
1450; SSE41-NEXT:    pxor %xmm5, %xmm0
1451; SSE41-NEXT:    movdqa {{.*#+}} xmm11 = [2147483775,2147483775]
1452; SSE41-NEXT:    movdqa %xmm11, %xmm6
1453; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
1454; SSE41-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
1455; SSE41-NEXT:    pcmpeqd %xmm11, %xmm0
1456; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1457; SSE41-NEXT:    pand %xmm8, %xmm4
1458; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
1459; SSE41-NEXT:    por %xmm4, %xmm0
1460; SSE41-NEXT:    movapd %xmm7, %xmm8
1461; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm8
1462; SSE41-NEXT:    movdqa %xmm3, %xmm0
1463; SSE41-NEXT:    pxor %xmm5, %xmm0
1464; SSE41-NEXT:    movdqa %xmm11, %xmm2
1465; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
1466; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1467; SSE41-NEXT:    pcmpeqd %xmm11, %xmm0
1468; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1469; SSE41-NEXT:    pand %xmm4, %xmm6
1470; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1471; SSE41-NEXT:    por %xmm6, %xmm0
1472; SSE41-NEXT:    movapd %xmm7, %xmm9
1473; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm9
1474; SSE41-NEXT:    movdqa %xmm10, %xmm0
1475; SSE41-NEXT:    pxor %xmm5, %xmm0
1476; SSE41-NEXT:    movdqa %xmm11, %xmm2
1477; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
1478; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
1479; SSE41-NEXT:    pcmpeqd %xmm11, %xmm0
1480; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1481; SSE41-NEXT:    pand %xmm3, %xmm4
1482; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1483; SSE41-NEXT:    por %xmm4, %xmm0
1484; SSE41-NEXT:    movapd %xmm7, %xmm2
1485; SSE41-NEXT:    blendvpd %xmm0, %xmm10, %xmm2
1486; SSE41-NEXT:    movdqa %xmm1, %xmm0
1487; SSE41-NEXT:    pxor %xmm5, %xmm0
1488; SSE41-NEXT:    movdqa %xmm11, %xmm3
1489; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
1490; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1491; SSE41-NEXT:    pcmpeqd %xmm11, %xmm0
1492; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1493; SSE41-NEXT:    pand %xmm4, %xmm6
1494; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1495; SSE41-NEXT:    por %xmm6, %xmm0
1496; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm7
1497; SSE41-NEXT:    movapd {{.*#+}} xmm3 = [18446744073709551488,18446744073709551488]
1498; SSE41-NEXT:    movapd %xmm7, %xmm0
1499; SSE41-NEXT:    xorpd %xmm5, %xmm0
1500; SSE41-NEXT:    movdqa {{.*#+}} xmm10 = [18446744071562067840,18446744071562067840]
1501; SSE41-NEXT:    movapd %xmm0, %xmm1
1502; SSE41-NEXT:    pcmpgtd %xmm10, %xmm1
1503; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
1504; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
1505; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1506; SSE41-NEXT:    pand %xmm6, %xmm4
1507; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1508; SSE41-NEXT:    por %xmm4, %xmm0
1509; SSE41-NEXT:    movapd %xmm3, %xmm4
1510; SSE41-NEXT:    blendvpd %xmm0, %xmm7, %xmm4
1511; SSE41-NEXT:    movapd %xmm2, %xmm0
1512; SSE41-NEXT:    xorpd %xmm5, %xmm0
1513; SSE41-NEXT:    movapd %xmm0, %xmm1
1514; SSE41-NEXT:    pcmpgtd %xmm10, %xmm1
1515; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
1516; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
1517; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1518; SSE41-NEXT:    pand %xmm6, %xmm7
1519; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1520; SSE41-NEXT:    por %xmm7, %xmm0
1521; SSE41-NEXT:    movapd %xmm3, %xmm1
1522; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
1523; SSE41-NEXT:    packssdw %xmm4, %xmm1
1524; SSE41-NEXT:    movapd %xmm9, %xmm0
1525; SSE41-NEXT:    xorpd %xmm5, %xmm0
1526; SSE41-NEXT:    movapd %xmm0, %xmm2
1527; SSE41-NEXT:    pcmpgtd %xmm10, %xmm2
1528; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1529; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
1530; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1531; SSE41-NEXT:    pand %xmm4, %xmm6
1532; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1533; SSE41-NEXT:    por %xmm6, %xmm0
1534; SSE41-NEXT:    movapd %xmm3, %xmm2
1535; SSE41-NEXT:    blendvpd %xmm0, %xmm9, %xmm2
1536; SSE41-NEXT:    xorpd %xmm8, %xmm5
1537; SSE41-NEXT:    movapd %xmm5, %xmm0
1538; SSE41-NEXT:    pcmpgtd %xmm10, %xmm0
1539; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
1540; SSE41-NEXT:    pcmpeqd %xmm10, %xmm5
1541; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1542; SSE41-NEXT:    pand %xmm4, %xmm5
1543; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1544; SSE41-NEXT:    por %xmm5, %xmm0
1545; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm3
1546; SSE41-NEXT:    packssdw %xmm2, %xmm3
1547; SSE41-NEXT:    packssdw %xmm3, %xmm1
1548; SSE41-NEXT:    movdqa %xmm1, %xmm0
1549; SSE41-NEXT:    retq
1550;
1551; AVX1-LABEL: trunc_ssat_v8i64_v8i8:
1552; AVX1:       # %bb.0:
1553; AVX1-NEXT:    vmovapd {{.*#+}} ymm2 = [127,127,127,127]
1554; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1555; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [127,127]
1556; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
1557; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm5
1558; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm5, %ymm3
1559; AVX1-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1560; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1561; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
1562; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm4, %xmm4
1563; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
1564; AVX1-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
1565; AVX1-NEXT:    vmovapd {{.*#+}} ymm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
1566; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1567; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [18446744073709551488,18446744073709551488]
1568; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
1569; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm5
1570; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm5, %ymm3
1571; AVX1-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
1572; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1573; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
1574; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm1, %xmm4
1575; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
1576; AVX1-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1577; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1578; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
1579; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1580; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1581; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1582; AVX1-NEXT:    vzeroupper
1583; AVX1-NEXT:    retq
1584;
1585; AVX2-LABEL: trunc_ssat_v8i64_v8i8:
1586; AVX2:       # %bb.0:
1587; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [127,127,127,127]
1588; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
1589; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
1590; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
1591; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1592; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
1593; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
1594; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1595; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
1596; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
1597; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
1598; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1599; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1600; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1601; AVX2-NEXT:    vzeroupper
1602; AVX2-NEXT:    retq
1603;
1604; AVX512-LABEL: trunc_ssat_v8i64_v8i8:
1605; AVX512:       # %bb.0:
1606; AVX512-NEXT:    vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
1607; AVX512-NEXT:    vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
1608; AVX512-NEXT:    vpmovqw %zmm0, %xmm0
1609; AVX512-NEXT:    vzeroupper
1610; AVX512-NEXT:    retq
1611  %1 = icmp slt <8 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
1612  %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
1613  %3 = icmp sgt <8 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
1614  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
1615  %5 = trunc <8 x i64> %4 to <8 x i8>
1616  ret <8 x i8> %5
1617}
1618
1619define void @trunc_ssat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
1620; SSE2-LABEL: trunc_ssat_v8i64_v8i8_store:
1621; SSE2:       # %bb.0:
1622; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [127,127]
1623; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
1624; SSE2-NEXT:    movdqa %xmm3, %xmm5
1625; SSE2-NEXT:    pxor %xmm4, %xmm5
1626; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [2147483775,2147483775]
1627; SSE2-NEXT:    movdqa %xmm9, %xmm7
1628; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
1629; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
1630; SSE2-NEXT:    pcmpeqd %xmm9, %xmm5
1631; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
1632; SSE2-NEXT:    pand %xmm10, %xmm6
1633; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1634; SSE2-NEXT:    por %xmm6, %xmm5
1635; SSE2-NEXT:    pand %xmm5, %xmm3
1636; SSE2-NEXT:    pandn %xmm8, %xmm5
1637; SSE2-NEXT:    por %xmm3, %xmm5
1638; SSE2-NEXT:    movdqa %xmm2, %xmm3
1639; SSE2-NEXT:    pxor %xmm4, %xmm3
1640; SSE2-NEXT:    movdqa %xmm9, %xmm6
1641; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
1642; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
1643; SSE2-NEXT:    pcmpeqd %xmm9, %xmm3
1644; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
1645; SSE2-NEXT:    pand %xmm10, %xmm7
1646; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
1647; SSE2-NEXT:    por %xmm7, %xmm3
1648; SSE2-NEXT:    pand %xmm3, %xmm2
1649; SSE2-NEXT:    pandn %xmm8, %xmm3
1650; SSE2-NEXT:    por %xmm2, %xmm3
1651; SSE2-NEXT:    movdqa %xmm1, %xmm2
1652; SSE2-NEXT:    pxor %xmm4, %xmm2
1653; SSE2-NEXT:    movdqa %xmm9, %xmm6
1654; SSE2-NEXT:    pcmpgtd %xmm2, %xmm6
1655; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
1656; SSE2-NEXT:    pcmpeqd %xmm9, %xmm2
1657; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
1658; SSE2-NEXT:    pand %xmm10, %xmm7
1659; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
1660; SSE2-NEXT:    por %xmm7, %xmm2
1661; SSE2-NEXT:    pand %xmm2, %xmm1
1662; SSE2-NEXT:    pandn %xmm8, %xmm2
1663; SSE2-NEXT:    por %xmm1, %xmm2
1664; SSE2-NEXT:    movdqa %xmm0, %xmm1
1665; SSE2-NEXT:    pxor %xmm4, %xmm1
1666; SSE2-NEXT:    movdqa %xmm9, %xmm6
1667; SSE2-NEXT:    pcmpgtd %xmm1, %xmm6
1668; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1669; SSE2-NEXT:    pcmpeqd %xmm9, %xmm1
1670; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1671; SSE2-NEXT:    pand %xmm7, %xmm1
1672; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3]
1673; SSE2-NEXT:    por %xmm1, %xmm7
1674; SSE2-NEXT:    pand %xmm7, %xmm0
1675; SSE2-NEXT:    pandn %xmm8, %xmm7
1676; SSE2-NEXT:    por %xmm0, %xmm7
1677; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488]
1678; SSE2-NEXT:    movdqa %xmm7, %xmm0
1679; SSE2-NEXT:    pxor %xmm4, %xmm0
1680; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840]
1681; SSE2-NEXT:    movdqa %xmm0, %xmm1
1682; SSE2-NEXT:    pcmpgtd %xmm9, %xmm1
1683; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm1[0,0,2,2]
1684; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
1685; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1686; SSE2-NEXT:    pand %xmm10, %xmm6
1687; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1688; SSE2-NEXT:    por %xmm6, %xmm0
1689; SSE2-NEXT:    pand %xmm0, %xmm7
1690; SSE2-NEXT:    pandn %xmm8, %xmm0
1691; SSE2-NEXT:    por %xmm7, %xmm0
1692; SSE2-NEXT:    movdqa %xmm2, %xmm1
1693; SSE2-NEXT:    pxor %xmm4, %xmm1
1694; SSE2-NEXT:    movdqa %xmm1, %xmm6
1695; SSE2-NEXT:    pcmpgtd %xmm9, %xmm6
1696; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1697; SSE2-NEXT:    pcmpeqd %xmm9, %xmm1
1698; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1699; SSE2-NEXT:    pand %xmm7, %xmm1
1700; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3]
1701; SSE2-NEXT:    por %xmm1, %xmm7
1702; SSE2-NEXT:    pand %xmm7, %xmm2
1703; SSE2-NEXT:    pandn %xmm8, %xmm7
1704; SSE2-NEXT:    por %xmm2, %xmm7
1705; SSE2-NEXT:    movdqa %xmm3, %xmm1
1706; SSE2-NEXT:    pxor %xmm4, %xmm1
1707; SSE2-NEXT:    movdqa %xmm1, %xmm2
1708; SSE2-NEXT:    pcmpgtd %xmm9, %xmm2
1709; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2]
1710; SSE2-NEXT:    pcmpeqd %xmm9, %xmm1
1711; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1712; SSE2-NEXT:    pand %xmm6, %xmm1
1713; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1714; SSE2-NEXT:    por %xmm1, %xmm2
1715; SSE2-NEXT:    pand %xmm2, %xmm3
1716; SSE2-NEXT:    pandn %xmm8, %xmm2
1717; SSE2-NEXT:    por %xmm3, %xmm2
1718; SSE2-NEXT:    pxor %xmm5, %xmm4
1719; SSE2-NEXT:    movdqa %xmm4, %xmm1
1720; SSE2-NEXT:    pcmpgtd %xmm9, %xmm1
1721; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
1722; SSE2-NEXT:    pcmpeqd %xmm9, %xmm4
1723; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1724; SSE2-NEXT:    pand %xmm3, %xmm4
1725; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1726; SSE2-NEXT:    por %xmm4, %xmm1
1727; SSE2-NEXT:    pand %xmm1, %xmm5
1728; SSE2-NEXT:    pandn %xmm8, %xmm1
1729; SSE2-NEXT:    por %xmm5, %xmm1
1730; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
1731; SSE2-NEXT:    pand %xmm3, %xmm1
1732; SSE2-NEXT:    pand %xmm3, %xmm2
1733; SSE2-NEXT:    packuswb %xmm1, %xmm2
1734; SSE2-NEXT:    pand %xmm3, %xmm7
1735; SSE2-NEXT:    pand %xmm3, %xmm0
1736; SSE2-NEXT:    packuswb %xmm7, %xmm0
1737; SSE2-NEXT:    packuswb %xmm2, %xmm0
1738; SSE2-NEXT:    packuswb %xmm0, %xmm0
1739; SSE2-NEXT:    movq %xmm0, (%rdi)
1740; SSE2-NEXT:    retq
1741;
1742; SSSE3-LABEL: trunc_ssat_v8i64_v8i8_store:
1743; SSSE3:       # %bb.0:
1744; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [127,127]
1745; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
1746; SSSE3-NEXT:    movdqa %xmm3, %xmm5
1747; SSSE3-NEXT:    pxor %xmm4, %xmm5
1748; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [2147483775,2147483775]
1749; SSSE3-NEXT:    movdqa %xmm9, %xmm7
1750; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm7
1751; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
1752; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm5
1753; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
1754; SSSE3-NEXT:    pand %xmm10, %xmm6
1755; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1756; SSSE3-NEXT:    por %xmm6, %xmm5
1757; SSSE3-NEXT:    pand %xmm5, %xmm3
1758; SSSE3-NEXT:    pandn %xmm8, %xmm5
1759; SSSE3-NEXT:    por %xmm3, %xmm5
1760; SSSE3-NEXT:    movdqa %xmm2, %xmm3
1761; SSSE3-NEXT:    pxor %xmm4, %xmm3
1762; SSSE3-NEXT:    movdqa %xmm9, %xmm6
1763; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm6
1764; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
1765; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm3
1766; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
1767; SSSE3-NEXT:    pand %xmm10, %xmm7
1768; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
1769; SSSE3-NEXT:    por %xmm7, %xmm3
1770; SSSE3-NEXT:    pand %xmm3, %xmm2
1771; SSSE3-NEXT:    pandn %xmm8, %xmm3
1772; SSSE3-NEXT:    por %xmm2, %xmm3
1773; SSSE3-NEXT:    movdqa %xmm1, %xmm2
1774; SSSE3-NEXT:    pxor %xmm4, %xmm2
1775; SSSE3-NEXT:    movdqa %xmm9, %xmm6
1776; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm6
1777; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
1778; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm2
1779; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
1780; SSSE3-NEXT:    pand %xmm10, %xmm7
1781; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
1782; SSSE3-NEXT:    por %xmm7, %xmm2
1783; SSSE3-NEXT:    pand %xmm2, %xmm1
1784; SSSE3-NEXT:    pandn %xmm8, %xmm2
1785; SSSE3-NEXT:    por %xmm1, %xmm2
1786; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1787; SSSE3-NEXT:    pxor %xmm4, %xmm1
1788; SSSE3-NEXT:    movdqa %xmm9, %xmm6
1789; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm6
1790; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1791; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm1
1792; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1793; SSSE3-NEXT:    pand %xmm7, %xmm1
1794; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3]
1795; SSSE3-NEXT:    por %xmm1, %xmm7
1796; SSSE3-NEXT:    pand %xmm7, %xmm0
1797; SSSE3-NEXT:    pandn %xmm8, %xmm7
1798; SSSE3-NEXT:    por %xmm0, %xmm7
1799; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488]
1800; SSSE3-NEXT:    movdqa %xmm7, %xmm0
1801; SSSE3-NEXT:    pxor %xmm4, %xmm0
1802; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840]
1803; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1804; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm1
1805; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm1[0,0,2,2]
1806; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
1807; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1808; SSSE3-NEXT:    pand %xmm10, %xmm6
1809; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1810; SSSE3-NEXT:    por %xmm6, %xmm0
1811; SSSE3-NEXT:    pand %xmm0, %xmm7
1812; SSSE3-NEXT:    pandn %xmm8, %xmm0
1813; SSSE3-NEXT:    por %xmm7, %xmm0
1814; SSSE3-NEXT:    movdqa %xmm2, %xmm1
1815; SSSE3-NEXT:    pxor %xmm4, %xmm1
1816; SSSE3-NEXT:    movdqa %xmm1, %xmm6
1817; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm6
1818; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1819; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm1
1820; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1821; SSSE3-NEXT:    pand %xmm7, %xmm1
1822; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3]
1823; SSSE3-NEXT:    por %xmm1, %xmm7
1824; SSSE3-NEXT:    pand %xmm7, %xmm2
1825; SSSE3-NEXT:    pandn %xmm8, %xmm7
1826; SSSE3-NEXT:    por %xmm2, %xmm7
1827; SSSE3-NEXT:    movdqa %xmm3, %xmm1
1828; SSSE3-NEXT:    pxor %xmm4, %xmm1
1829; SSSE3-NEXT:    movdqa %xmm1, %xmm2
1830; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm2
1831; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2]
1832; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm1
1833; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1834; SSSE3-NEXT:    pand %xmm6, %xmm1
1835; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1836; SSSE3-NEXT:    por %xmm1, %xmm2
1837; SSSE3-NEXT:    pand %xmm2, %xmm3
1838; SSSE3-NEXT:    pandn %xmm8, %xmm2
1839; SSSE3-NEXT:    por %xmm3, %xmm2
1840; SSSE3-NEXT:    pxor %xmm5, %xmm4
1841; SSSE3-NEXT:    movdqa %xmm4, %xmm1
1842; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm1
1843; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
1844; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm4
1845; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1846; SSSE3-NEXT:    pand %xmm3, %xmm4
1847; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1848; SSSE3-NEXT:    por %xmm4, %xmm1
1849; SSSE3-NEXT:    pand %xmm1, %xmm5
1850; SSSE3-NEXT:    pandn %xmm8, %xmm1
1851; SSSE3-NEXT:    por %xmm5, %xmm1
1852; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
1853; SSSE3-NEXT:    pand %xmm3, %xmm1
1854; SSSE3-NEXT:    pand %xmm3, %xmm2
1855; SSSE3-NEXT:    packuswb %xmm1, %xmm2
1856; SSSE3-NEXT:    pand %xmm3, %xmm7
1857; SSSE3-NEXT:    pand %xmm3, %xmm0
1858; SSSE3-NEXT:    packuswb %xmm7, %xmm0
1859; SSSE3-NEXT:    packuswb %xmm2, %xmm0
1860; SSSE3-NEXT:    packuswb %xmm0, %xmm0
1861; SSSE3-NEXT:    movq %xmm0, (%rdi)
1862; SSSE3-NEXT:    retq
1863;
1864; SSE41-LABEL: trunc_ssat_v8i64_v8i8_store:
1865; SSE41:       # %bb.0:
1866; SSE41-NEXT:    movdqa %xmm0, %xmm8
1867; SSE41-NEXT:    movapd {{.*#+}} xmm7 = [127,127]
1868; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,0,2147483648,0]
1869; SSE41-NEXT:    movdqa %xmm3, %xmm0
1870; SSE41-NEXT:    pxor %xmm5, %xmm0
1871; SSE41-NEXT:    movdqa {{.*#+}} xmm10 = [2147483775,2147483775]
1872; SSE41-NEXT:    movdqa %xmm10, %xmm6
1873; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
1874; SSE41-NEXT:    pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2]
1875; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
1876; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1877; SSE41-NEXT:    pand %xmm9, %xmm4
1878; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
1879; SSE41-NEXT:    por %xmm4, %xmm0
1880; SSE41-NEXT:    movapd %xmm7, %xmm9
1881; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm9
1882; SSE41-NEXT:    movdqa %xmm2, %xmm0
1883; SSE41-NEXT:    pxor %xmm5, %xmm0
1884; SSE41-NEXT:    movdqa %xmm10, %xmm3
1885; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
1886; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1887; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
1888; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1889; SSE41-NEXT:    pand %xmm4, %xmm6
1890; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1891; SSE41-NEXT:    por %xmm6, %xmm0
1892; SSE41-NEXT:    movapd %xmm7, %xmm11
1893; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm11
1894; SSE41-NEXT:    movdqa %xmm1, %xmm0
1895; SSE41-NEXT:    pxor %xmm5, %xmm0
1896; SSE41-NEXT:    movdqa %xmm10, %xmm2
1897; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
1898; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1899; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
1900; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1901; SSE41-NEXT:    pand %xmm4, %xmm6
1902; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1903; SSE41-NEXT:    por %xmm6, %xmm0
1904; SSE41-NEXT:    movapd %xmm7, %xmm6
1905; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm6
1906; SSE41-NEXT:    movdqa %xmm8, %xmm0
1907; SSE41-NEXT:    pxor %xmm5, %xmm0
1908; SSE41-NEXT:    movdqa %xmm10, %xmm1
1909; SSE41-NEXT:    pcmpgtd %xmm0, %xmm1
1910; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
1911; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
1912; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1913; SSE41-NEXT:    pand %xmm2, %xmm4
1914; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1915; SSE41-NEXT:    por %xmm4, %xmm0
1916; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm7
1917; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
1918; SSE41-NEXT:    movapd %xmm7, %xmm0
1919; SSE41-NEXT:    xorpd %xmm5, %xmm0
1920; SSE41-NEXT:    movdqa {{.*#+}} xmm8 = [18446744071562067840,18446744071562067840]
1921; SSE41-NEXT:    movapd %xmm0, %xmm2
1922; SSE41-NEXT:    pcmpgtd %xmm8, %xmm2
1923; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
1924; SSE41-NEXT:    pcmpeqd %xmm8, %xmm0
1925; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1926; SSE41-NEXT:    pand %xmm3, %xmm4
1927; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1928; SSE41-NEXT:    por %xmm4, %xmm0
1929; SSE41-NEXT:    movapd %xmm1, %xmm2
1930; SSE41-NEXT:    blendvpd %xmm0, %xmm7, %xmm2
1931; SSE41-NEXT:    movapd %xmm6, %xmm0
1932; SSE41-NEXT:    xorpd %xmm5, %xmm0
1933; SSE41-NEXT:    movapd %xmm0, %xmm3
1934; SSE41-NEXT:    pcmpgtd %xmm8, %xmm3
1935; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1936; SSE41-NEXT:    pcmpeqd %xmm8, %xmm0
1937; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1938; SSE41-NEXT:    pand %xmm4, %xmm7
1939; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1940; SSE41-NEXT:    por %xmm7, %xmm0
1941; SSE41-NEXT:    movapd %xmm1, %xmm7
1942; SSE41-NEXT:    blendvpd %xmm0, %xmm6, %xmm7
1943; SSE41-NEXT:    movapd %xmm11, %xmm0
1944; SSE41-NEXT:    xorpd %xmm5, %xmm0
1945; SSE41-NEXT:    movapd %xmm0, %xmm3
1946; SSE41-NEXT:    pcmpgtd %xmm8, %xmm3
1947; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1948; SSE41-NEXT:    pcmpeqd %xmm8, %xmm0
1949; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1950; SSE41-NEXT:    pand %xmm4, %xmm6
1951; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1952; SSE41-NEXT:    por %xmm6, %xmm0
1953; SSE41-NEXT:    movapd %xmm1, %xmm3
1954; SSE41-NEXT:    blendvpd %xmm0, %xmm11, %xmm3
1955; SSE41-NEXT:    xorpd %xmm9, %xmm5
1956; SSE41-NEXT:    movapd %xmm5, %xmm0
1957; SSE41-NEXT:    pcmpgtd %xmm8, %xmm0
1958; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
1959; SSE41-NEXT:    pcmpeqd %xmm8, %xmm5
1960; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1961; SSE41-NEXT:    pand %xmm4, %xmm5
1962; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1963; SSE41-NEXT:    por %xmm5, %xmm0
1964; SSE41-NEXT:    blendvpd %xmm0, %xmm9, %xmm1
1965; SSE41-NEXT:    movapd {{.*#+}} xmm0 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
1966; SSE41-NEXT:    andpd %xmm0, %xmm1
1967; SSE41-NEXT:    andpd %xmm0, %xmm3
1968; SSE41-NEXT:    packusdw %xmm1, %xmm3
1969; SSE41-NEXT:    andpd %xmm0, %xmm7
1970; SSE41-NEXT:    andpd %xmm0, %xmm2
1971; SSE41-NEXT:    packusdw %xmm7, %xmm2
1972; SSE41-NEXT:    packusdw %xmm3, %xmm2
1973; SSE41-NEXT:    packuswb %xmm2, %xmm2
1974; SSE41-NEXT:    movq %xmm2, (%rdi)
1975; SSE41-NEXT:    retq
1976;
1977; AVX1-LABEL: trunc_ssat_v8i64_v8i8_store:
1978; AVX1:       # %bb.0:
1979; AVX1-NEXT:    vmovapd {{.*#+}} ymm2 = [127,127,127,127]
1980; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1981; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [127,127]
1982; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
1983; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm5
1984; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm5, %ymm3
1985; AVX1-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1986; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1987; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
1988; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm4, %xmm4
1989; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
1990; AVX1-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
1991; AVX1-NEXT:    vmovapd {{.*#+}} ymm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
1992; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1993; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [18446744073709551488,18446744073709551488]
1994; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
1995; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm5
1996; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm5, %ymm3
1997; AVX1-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
1998; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1999; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
2000; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm1, %xmm4
2001; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
2002; AVX1-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
2003; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2004; AVX1-NEXT:    vmovapd {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
2005; AVX1-NEXT:    vandpd %xmm3, %xmm2, %xmm2
2006; AVX1-NEXT:    vandpd %xmm3, %xmm1, %xmm1
2007; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
2008; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2009; AVX1-NEXT:    vandpd %xmm3, %xmm2, %xmm2
2010; AVX1-NEXT:    vandpd %xmm3, %xmm0, %xmm0
2011; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
2012; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
2013; AVX1-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
2014; AVX1-NEXT:    vmovq %xmm0, (%rdi)
2015; AVX1-NEXT:    vzeroupper
2016; AVX1-NEXT:    retq
2017;
2018; AVX2-LABEL: trunc_ssat_v8i64_v8i8_store:
2019; AVX2:       # %bb.0:
2020; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [127,127,127,127]
2021; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
2022; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
2023; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
2024; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
2025; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
2026; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
2027; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
2028; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
2029; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
2030; AVX2-NEXT:    vextractf128 $1, %ymm1, %xmm2
2031; AVX2-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
2032; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2033; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
2034; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm3
2035; AVX2-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
2036; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
2037; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2038; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2039; AVX2-NEXT:    vmovq %xmm0, (%rdi)
2040; AVX2-NEXT:    vzeroupper
2041; AVX2-NEXT:    retq
2042;
2043; AVX512-LABEL: trunc_ssat_v8i64_v8i8_store:
2044; AVX512:       # %bb.0:
2045; AVX512-NEXT:    vpmovsqb %zmm0, (%rdi)
2046; AVX512-NEXT:    vzeroupper
2047; AVX512-NEXT:    retq
2048  %1 = icmp slt <8 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
2049  %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
2050  %3 = icmp sgt <8 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
2051  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
2052  %5 = trunc <8 x i64> %4 to <8 x i8>
2053  store <8 x i8> %5, <8 x i8> *%p1
2054  ret void
2055}
2056
2057define <16 x i8> @trunc_ssat_v16i64_v16i8(<16 x i64> %a0) {
2058; SSE2-LABEL: trunc_ssat_v16i64_v16i8:
2059; SSE2:       # %bb.0:
2060; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [127,127]
2061; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0]
2062; SSE2-NEXT:    movdqa %xmm6, %xmm9
2063; SSE2-NEXT:    pxor %xmm8, %xmm9
2064; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [2147483775,2147483775]
2065; SSE2-NEXT:    movdqa %xmm11, %xmm12
2066; SSE2-NEXT:    pcmpgtd %xmm9, %xmm12
2067; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
2068; SSE2-NEXT:    pcmpeqd %xmm11, %xmm9
2069; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm9[1,1,3,3]
2070; SSE2-NEXT:    pand %xmm13, %xmm14
2071; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm12[1,1,3,3]
2072; SSE2-NEXT:    por %xmm14, %xmm9
2073; SSE2-NEXT:    pand %xmm9, %xmm6
2074; SSE2-NEXT:    pandn %xmm10, %xmm9
2075; SSE2-NEXT:    por %xmm6, %xmm9
2076; SSE2-NEXT:    movdqa %xmm7, %xmm6
2077; SSE2-NEXT:    pxor %xmm8, %xmm6
2078; SSE2-NEXT:    movdqa %xmm11, %xmm12
2079; SSE2-NEXT:    pcmpgtd %xmm6, %xmm12
2080; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
2081; SSE2-NEXT:    pcmpeqd %xmm11, %xmm6
2082; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2083; SSE2-NEXT:    pand %xmm13, %xmm6
2084; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm12[1,1,3,3]
2085; SSE2-NEXT:    por %xmm6, %xmm12
2086; SSE2-NEXT:    pand %xmm12, %xmm7
2087; SSE2-NEXT:    pandn %xmm10, %xmm12
2088; SSE2-NEXT:    por %xmm7, %xmm12
2089; SSE2-NEXT:    movdqa %xmm4, %xmm6
2090; SSE2-NEXT:    pxor %xmm8, %xmm6
2091; SSE2-NEXT:    movdqa %xmm11, %xmm7
2092; SSE2-NEXT:    pcmpgtd %xmm6, %xmm7
2093; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm7[0,0,2,2]
2094; SSE2-NEXT:    pcmpeqd %xmm11, %xmm6
2095; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2096; SSE2-NEXT:    pand %xmm13, %xmm6
2097; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm7[1,1,3,3]
2098; SSE2-NEXT:    por %xmm6, %xmm13
2099; SSE2-NEXT:    pand %xmm13, %xmm4
2100; SSE2-NEXT:    pandn %xmm10, %xmm13
2101; SSE2-NEXT:    por %xmm4, %xmm13
2102; SSE2-NEXT:    movdqa %xmm5, %xmm4
2103; SSE2-NEXT:    pxor %xmm8, %xmm4
2104; SSE2-NEXT:    movdqa %xmm11, %xmm6
2105; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
2106; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
2107; SSE2-NEXT:    pcmpeqd %xmm11, %xmm4
2108; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2109; SSE2-NEXT:    pand %xmm7, %xmm4
2110; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm6[1,1,3,3]
2111; SSE2-NEXT:    por %xmm4, %xmm14
2112; SSE2-NEXT:    pand %xmm14, %xmm5
2113; SSE2-NEXT:    pandn %xmm10, %xmm14
2114; SSE2-NEXT:    por %xmm5, %xmm14
2115; SSE2-NEXT:    movdqa %xmm2, %xmm4
2116; SSE2-NEXT:    pxor %xmm8, %xmm4
2117; SSE2-NEXT:    movdqa %xmm11, %xmm5
2118; SSE2-NEXT:    pcmpgtd %xmm4, %xmm5
2119; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
2120; SSE2-NEXT:    pcmpeqd %xmm11, %xmm4
2121; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2122; SSE2-NEXT:    pand %xmm6, %xmm4
2123; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2124; SSE2-NEXT:    por %xmm4, %xmm5
2125; SSE2-NEXT:    pand %xmm5, %xmm2
2126; SSE2-NEXT:    pandn %xmm10, %xmm5
2127; SSE2-NEXT:    por %xmm2, %xmm5
2128; SSE2-NEXT:    movdqa %xmm3, %xmm2
2129; SSE2-NEXT:    pxor %xmm8, %xmm2
2130; SSE2-NEXT:    movdqa %xmm11, %xmm4
2131; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
2132; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2133; SSE2-NEXT:    pcmpeqd %xmm11, %xmm2
2134; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2135; SSE2-NEXT:    pand %xmm6, %xmm2
2136; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
2137; SSE2-NEXT:    por %xmm2, %xmm6
2138; SSE2-NEXT:    pand %xmm6, %xmm3
2139; SSE2-NEXT:    pandn %xmm10, %xmm6
2140; SSE2-NEXT:    por %xmm3, %xmm6
2141; SSE2-NEXT:    movdqa %xmm0, %xmm2
2142; SSE2-NEXT:    pxor %xmm8, %xmm2
2143; SSE2-NEXT:    movdqa %xmm11, %xmm3
2144; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
2145; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2146; SSE2-NEXT:    pcmpeqd %xmm11, %xmm2
2147; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2148; SSE2-NEXT:    pand %xmm4, %xmm2
2149; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2150; SSE2-NEXT:    por %xmm2, %xmm3
2151; SSE2-NEXT:    pand %xmm3, %xmm0
2152; SSE2-NEXT:    pandn %xmm10, %xmm3
2153; SSE2-NEXT:    por %xmm0, %xmm3
2154; SSE2-NEXT:    movdqa %xmm1, %xmm0
2155; SSE2-NEXT:    pxor %xmm8, %xmm0
2156; SSE2-NEXT:    movdqa %xmm11, %xmm2
2157; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
2158; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2159; SSE2-NEXT:    pcmpeqd %xmm11, %xmm0
2160; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2161; SSE2-NEXT:    pand %xmm4, %xmm0
2162; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
2163; SSE2-NEXT:    por %xmm0, %xmm4
2164; SSE2-NEXT:    pand %xmm4, %xmm1
2165; SSE2-NEXT:    pandn %xmm10, %xmm4
2166; SSE2-NEXT:    por %xmm1, %xmm4
2167; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [18446744073709551488,18446744073709551488]
2168; SSE2-NEXT:    movdqa %xmm4, %xmm0
2169; SSE2-NEXT:    pxor %xmm8, %xmm0
2170; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [18446744071562067840,18446744071562067840]
2171; SSE2-NEXT:    movdqa %xmm0, %xmm1
2172; SSE2-NEXT:    pcmpgtd %xmm11, %xmm1
2173; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
2174; SSE2-NEXT:    pcmpeqd %xmm11, %xmm0
2175; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2176; SSE2-NEXT:    pand %xmm2, %xmm0
2177; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2178; SSE2-NEXT:    por %xmm0, %xmm1
2179; SSE2-NEXT:    pand %xmm1, %xmm4
2180; SSE2-NEXT:    pandn %xmm10, %xmm1
2181; SSE2-NEXT:    por %xmm4, %xmm1
2182; SSE2-NEXT:    movdqa %xmm3, %xmm0
2183; SSE2-NEXT:    pxor %xmm8, %xmm0
2184; SSE2-NEXT:    movdqa %xmm0, %xmm2
2185; SSE2-NEXT:    pcmpgtd %xmm11, %xmm2
2186; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2187; SSE2-NEXT:    pcmpeqd %xmm11, %xmm0
2188; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2189; SSE2-NEXT:    pand %xmm4, %xmm7
2190; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2191; SSE2-NEXT:    por %xmm7, %xmm0
2192; SSE2-NEXT:    pand %xmm0, %xmm3
2193; SSE2-NEXT:    pandn %xmm10, %xmm0
2194; SSE2-NEXT:    por %xmm3, %xmm0
2195; SSE2-NEXT:    packssdw %xmm1, %xmm0
2196; SSE2-NEXT:    movdqa %xmm6, %xmm1
2197; SSE2-NEXT:    pxor %xmm8, %xmm1
2198; SSE2-NEXT:    movdqa %xmm1, %xmm2
2199; SSE2-NEXT:    pcmpgtd %xmm11, %xmm2
2200; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
2201; SSE2-NEXT:    pcmpeqd %xmm11, %xmm1
2202; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2203; SSE2-NEXT:    pand %xmm3, %xmm1
2204; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2205; SSE2-NEXT:    por %xmm1, %xmm2
2206; SSE2-NEXT:    pand %xmm2, %xmm6
2207; SSE2-NEXT:    pandn %xmm10, %xmm2
2208; SSE2-NEXT:    por %xmm6, %xmm2
2209; SSE2-NEXT:    movdqa %xmm5, %xmm1
2210; SSE2-NEXT:    pxor %xmm8, %xmm1
2211; SSE2-NEXT:    movdqa %xmm1, %xmm3
2212; SSE2-NEXT:    pcmpgtd %xmm11, %xmm3
2213; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2214; SSE2-NEXT:    pcmpeqd %xmm11, %xmm1
2215; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2216; SSE2-NEXT:    pand %xmm4, %xmm1
2217; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2218; SSE2-NEXT:    por %xmm1, %xmm3
2219; SSE2-NEXT:    pand %xmm3, %xmm5
2220; SSE2-NEXT:    pandn %xmm10, %xmm3
2221; SSE2-NEXT:    por %xmm5, %xmm3
2222; SSE2-NEXT:    packssdw %xmm2, %xmm3
2223; SSE2-NEXT:    packssdw %xmm3, %xmm0
2224; SSE2-NEXT:    movdqa %xmm14, %xmm1
2225; SSE2-NEXT:    pxor %xmm8, %xmm1
2226; SSE2-NEXT:    movdqa %xmm1, %xmm2
2227; SSE2-NEXT:    pcmpgtd %xmm11, %xmm2
2228; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
2229; SSE2-NEXT:    pcmpeqd %xmm11, %xmm1
2230; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2231; SSE2-NEXT:    pand %xmm3, %xmm1
2232; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2233; SSE2-NEXT:    por %xmm1, %xmm2
2234; SSE2-NEXT:    pand %xmm2, %xmm14
2235; SSE2-NEXT:    pandn %xmm10, %xmm2
2236; SSE2-NEXT:    por %xmm14, %xmm2
2237; SSE2-NEXT:    movdqa %xmm13, %xmm1
2238; SSE2-NEXT:    pxor %xmm8, %xmm1
2239; SSE2-NEXT:    movdqa %xmm1, %xmm3
2240; SSE2-NEXT:    pcmpgtd %xmm11, %xmm3
2241; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2242; SSE2-NEXT:    pcmpeqd %xmm11, %xmm1
2243; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2244; SSE2-NEXT:    pand %xmm4, %xmm1
2245; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2246; SSE2-NEXT:    por %xmm1, %xmm3
2247; SSE2-NEXT:    pand %xmm3, %xmm13
2248; SSE2-NEXT:    pandn %xmm10, %xmm3
2249; SSE2-NEXT:    por %xmm13, %xmm3
2250; SSE2-NEXT:    packssdw %xmm2, %xmm3
2251; SSE2-NEXT:    movdqa %xmm12, %xmm1
2252; SSE2-NEXT:    pxor %xmm8, %xmm1
2253; SSE2-NEXT:    movdqa %xmm1, %xmm2
2254; SSE2-NEXT:    pcmpgtd %xmm11, %xmm2
2255; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2256; SSE2-NEXT:    pcmpeqd %xmm11, %xmm1
2257; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2258; SSE2-NEXT:    pand %xmm4, %xmm1
2259; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2260; SSE2-NEXT:    por %xmm1, %xmm2
2261; SSE2-NEXT:    pand %xmm2, %xmm12
2262; SSE2-NEXT:    pandn %xmm10, %xmm2
2263; SSE2-NEXT:    por %xmm12, %xmm2
2264; SSE2-NEXT:    pxor %xmm9, %xmm8
2265; SSE2-NEXT:    movdqa %xmm8, %xmm1
2266; SSE2-NEXT:    pcmpgtd %xmm11, %xmm1
2267; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
2268; SSE2-NEXT:    pcmpeqd %xmm11, %xmm8
2269; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm8[1,1,3,3]
2270; SSE2-NEXT:    pand %xmm4, %xmm5
2271; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2272; SSE2-NEXT:    por %xmm5, %xmm1
2273; SSE2-NEXT:    pand %xmm1, %xmm9
2274; SSE2-NEXT:    pandn %xmm10, %xmm1
2275; SSE2-NEXT:    por %xmm9, %xmm1
2276; SSE2-NEXT:    packssdw %xmm2, %xmm1
2277; SSE2-NEXT:    packssdw %xmm1, %xmm3
2278; SSE2-NEXT:    packsswb %xmm3, %xmm0
2279; SSE2-NEXT:    retq
2280;
2281; SSSE3-LABEL: trunc_ssat_v16i64_v16i8:
2282; SSSE3:       # %bb.0:
2283; SSSE3-NEXT:    movdqa {{.*#+}} xmm10 = [127,127]
2284; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0]
2285; SSSE3-NEXT:    movdqa %xmm6, %xmm9
2286; SSSE3-NEXT:    pxor %xmm8, %xmm9
2287; SSSE3-NEXT:    movdqa {{.*#+}} xmm11 = [2147483775,2147483775]
2288; SSSE3-NEXT:    movdqa %xmm11, %xmm12
2289; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm12
2290; SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
2291; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm9
2292; SSSE3-NEXT:    pshufd {{.*#+}} xmm14 = xmm9[1,1,3,3]
2293; SSSE3-NEXT:    pand %xmm13, %xmm14
2294; SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm12[1,1,3,3]
2295; SSSE3-NEXT:    por %xmm14, %xmm9
2296; SSSE3-NEXT:    pand %xmm9, %xmm6
2297; SSSE3-NEXT:    pandn %xmm10, %xmm9
2298; SSSE3-NEXT:    por %xmm6, %xmm9
2299; SSSE3-NEXT:    movdqa %xmm7, %xmm6
2300; SSSE3-NEXT:    pxor %xmm8, %xmm6
2301; SSSE3-NEXT:    movdqa %xmm11, %xmm12
2302; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm12
2303; SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
2304; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm6
2305; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2306; SSSE3-NEXT:    pand %xmm13, %xmm6
2307; SSSE3-NEXT:    pshufd {{.*#+}} xmm12 = xmm12[1,1,3,3]
2308; SSSE3-NEXT:    por %xmm6, %xmm12
2309; SSSE3-NEXT:    pand %xmm12, %xmm7
2310; SSSE3-NEXT:    pandn %xmm10, %xmm12
2311; SSSE3-NEXT:    por %xmm7, %xmm12
2312; SSSE3-NEXT:    movdqa %xmm4, %xmm6
2313; SSSE3-NEXT:    pxor %xmm8, %xmm6
2314; SSSE3-NEXT:    movdqa %xmm11, %xmm7
2315; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm7
2316; SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm7[0,0,2,2]
2317; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm6
2318; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2319; SSSE3-NEXT:    pand %xmm13, %xmm6
2320; SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm7[1,1,3,3]
2321; SSSE3-NEXT:    por %xmm6, %xmm13
2322; SSSE3-NEXT:    pand %xmm13, %xmm4
2323; SSSE3-NEXT:    pandn %xmm10, %xmm13
2324; SSSE3-NEXT:    por %xmm4, %xmm13
2325; SSSE3-NEXT:    movdqa %xmm5, %xmm4
2326; SSSE3-NEXT:    pxor %xmm8, %xmm4
2327; SSSE3-NEXT:    movdqa %xmm11, %xmm6
2328; SSSE3-NEXT:    pcmpgtd %xmm4, %xmm6
2329; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
2330; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm4
2331; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2332; SSSE3-NEXT:    pand %xmm7, %xmm4
2333; SSSE3-NEXT:    pshufd {{.*#+}} xmm14 = xmm6[1,1,3,3]
2334; SSSE3-NEXT:    por %xmm4, %xmm14
2335; SSSE3-NEXT:    pand %xmm14, %xmm5
2336; SSSE3-NEXT:    pandn %xmm10, %xmm14
2337; SSSE3-NEXT:    por %xmm5, %xmm14
2338; SSSE3-NEXT:    movdqa %xmm2, %xmm4
2339; SSSE3-NEXT:    pxor %xmm8, %xmm4
2340; SSSE3-NEXT:    movdqa %xmm11, %xmm5
2341; SSSE3-NEXT:    pcmpgtd %xmm4, %xmm5
2342; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
2343; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm4
2344; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2345; SSSE3-NEXT:    pand %xmm6, %xmm4
2346; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2347; SSSE3-NEXT:    por %xmm4, %xmm5
2348; SSSE3-NEXT:    pand %xmm5, %xmm2
2349; SSSE3-NEXT:    pandn %xmm10, %xmm5
2350; SSSE3-NEXT:    por %xmm2, %xmm5
2351; SSSE3-NEXT:    movdqa %xmm3, %xmm2
2352; SSSE3-NEXT:    pxor %xmm8, %xmm2
2353; SSSE3-NEXT:    movdqa %xmm11, %xmm4
2354; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm4
2355; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2356; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm2
2357; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2358; SSSE3-NEXT:    pand %xmm6, %xmm2
2359; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
2360; SSSE3-NEXT:    por %xmm2, %xmm6
2361; SSSE3-NEXT:    pand %xmm6, %xmm3
2362; SSSE3-NEXT:    pandn %xmm10, %xmm6
2363; SSSE3-NEXT:    por %xmm3, %xmm6
2364; SSSE3-NEXT:    movdqa %xmm0, %xmm2
2365; SSSE3-NEXT:    pxor %xmm8, %xmm2
2366; SSSE3-NEXT:    movdqa %xmm11, %xmm3
2367; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm3
2368; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2369; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm2
2370; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2371; SSSE3-NEXT:    pand %xmm4, %xmm2
2372; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2373; SSSE3-NEXT:    por %xmm2, %xmm3
2374; SSSE3-NEXT:    pand %xmm3, %xmm0
2375; SSSE3-NEXT:    pandn %xmm10, %xmm3
2376; SSSE3-NEXT:    por %xmm0, %xmm3
2377; SSSE3-NEXT:    movdqa %xmm1, %xmm0
2378; SSSE3-NEXT:    pxor %xmm8, %xmm0
2379; SSSE3-NEXT:    movdqa %xmm11, %xmm2
2380; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
2381; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2382; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm0
2383; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2384; SSSE3-NEXT:    pand %xmm4, %xmm0
2385; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
2386; SSSE3-NEXT:    por %xmm0, %xmm4
2387; SSSE3-NEXT:    pand %xmm4, %xmm1
2388; SSSE3-NEXT:    pandn %xmm10, %xmm4
2389; SSSE3-NEXT:    por %xmm1, %xmm4
2390; SSSE3-NEXT:    movdqa {{.*#+}} xmm10 = [18446744073709551488,18446744073709551488]
2391; SSSE3-NEXT:    movdqa %xmm4, %xmm0
2392; SSSE3-NEXT:    pxor %xmm8, %xmm0
2393; SSSE3-NEXT:    movdqa {{.*#+}} xmm11 = [18446744071562067840,18446744071562067840]
2394; SSSE3-NEXT:    movdqa %xmm0, %xmm1
2395; SSSE3-NEXT:    pcmpgtd %xmm11, %xmm1
2396; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
2397; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm0
2398; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2399; SSSE3-NEXT:    pand %xmm2, %xmm0
2400; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2401; SSSE3-NEXT:    por %xmm0, %xmm1
2402; SSSE3-NEXT:    pand %xmm1, %xmm4
2403; SSSE3-NEXT:    pandn %xmm10, %xmm1
2404; SSSE3-NEXT:    por %xmm4, %xmm1
2405; SSSE3-NEXT:    movdqa %xmm3, %xmm0
2406; SSSE3-NEXT:    pxor %xmm8, %xmm0
2407; SSSE3-NEXT:    movdqa %xmm0, %xmm2
2408; SSSE3-NEXT:    pcmpgtd %xmm11, %xmm2
2409; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2410; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm0
2411; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2412; SSSE3-NEXT:    pand %xmm4, %xmm7
2413; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2414; SSSE3-NEXT:    por %xmm7, %xmm0
2415; SSSE3-NEXT:    pand %xmm0, %xmm3
2416; SSSE3-NEXT:    pandn %xmm10, %xmm0
2417; SSSE3-NEXT:    por %xmm3, %xmm0
2418; SSSE3-NEXT:    packssdw %xmm1, %xmm0
2419; SSSE3-NEXT:    movdqa %xmm6, %xmm1
2420; SSSE3-NEXT:    pxor %xmm8, %xmm1
2421; SSSE3-NEXT:    movdqa %xmm1, %xmm2
2422; SSSE3-NEXT:    pcmpgtd %xmm11, %xmm2
2423; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
2424; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm1
2425; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2426; SSSE3-NEXT:    pand %xmm3, %xmm1
2427; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2428; SSSE3-NEXT:    por %xmm1, %xmm2
2429; SSSE3-NEXT:    pand %xmm2, %xmm6
2430; SSSE3-NEXT:    pandn %xmm10, %xmm2
2431; SSSE3-NEXT:    por %xmm6, %xmm2
2432; SSSE3-NEXT:    movdqa %xmm5, %xmm1
2433; SSSE3-NEXT:    pxor %xmm8, %xmm1
2434; SSSE3-NEXT:    movdqa %xmm1, %xmm3
2435; SSSE3-NEXT:    pcmpgtd %xmm11, %xmm3
2436; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2437; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm1
2438; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2439; SSSE3-NEXT:    pand %xmm4, %xmm1
2440; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2441; SSSE3-NEXT:    por %xmm1, %xmm3
2442; SSSE3-NEXT:    pand %xmm3, %xmm5
2443; SSSE3-NEXT:    pandn %xmm10, %xmm3
2444; SSSE3-NEXT:    por %xmm5, %xmm3
2445; SSSE3-NEXT:    packssdw %xmm2, %xmm3
2446; SSSE3-NEXT:    packssdw %xmm3, %xmm0
2447; SSSE3-NEXT:    movdqa %xmm14, %xmm1
2448; SSSE3-NEXT:    pxor %xmm8, %xmm1
2449; SSSE3-NEXT:    movdqa %xmm1, %xmm2
2450; SSSE3-NEXT:    pcmpgtd %xmm11, %xmm2
2451; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
2452; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm1
2453; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2454; SSSE3-NEXT:    pand %xmm3, %xmm1
2455; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2456; SSSE3-NEXT:    por %xmm1, %xmm2
2457; SSSE3-NEXT:    pand %xmm2, %xmm14
2458; SSSE3-NEXT:    pandn %xmm10, %xmm2
2459; SSSE3-NEXT:    por %xmm14, %xmm2
2460; SSSE3-NEXT:    movdqa %xmm13, %xmm1
2461; SSSE3-NEXT:    pxor %xmm8, %xmm1
2462; SSSE3-NEXT:    movdqa %xmm1, %xmm3
2463; SSSE3-NEXT:    pcmpgtd %xmm11, %xmm3
2464; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2465; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm1
2466; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2467; SSSE3-NEXT:    pand %xmm4, %xmm1
2468; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2469; SSSE3-NEXT:    por %xmm1, %xmm3
2470; SSSE3-NEXT:    pand %xmm3, %xmm13
2471; SSSE3-NEXT:    pandn %xmm10, %xmm3
2472; SSSE3-NEXT:    por %xmm13, %xmm3
2473; SSSE3-NEXT:    packssdw %xmm2, %xmm3
2474; SSSE3-NEXT:    movdqa %xmm12, %xmm1
2475; SSSE3-NEXT:    pxor %xmm8, %xmm1
2476; SSSE3-NEXT:    movdqa %xmm1, %xmm2
2477; SSSE3-NEXT:    pcmpgtd %xmm11, %xmm2
2478; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2479; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm1
2480; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2481; SSSE3-NEXT:    pand %xmm4, %xmm1
2482; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2483; SSSE3-NEXT:    por %xmm1, %xmm2
2484; SSSE3-NEXT:    pand %xmm2, %xmm12
2485; SSSE3-NEXT:    pandn %xmm10, %xmm2
2486; SSSE3-NEXT:    por %xmm12, %xmm2
2487; SSSE3-NEXT:    pxor %xmm9, %xmm8
2488; SSSE3-NEXT:    movdqa %xmm8, %xmm1
2489; SSSE3-NEXT:    pcmpgtd %xmm11, %xmm1
2490; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
2491; SSSE3-NEXT:    pcmpeqd %xmm11, %xmm8
2492; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm8[1,1,3,3]
2493; SSSE3-NEXT:    pand %xmm4, %xmm5
2494; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2495; SSSE3-NEXT:    por %xmm5, %xmm1
2496; SSSE3-NEXT:    pand %xmm1, %xmm9
2497; SSSE3-NEXT:    pandn %xmm10, %xmm1
2498; SSSE3-NEXT:    por %xmm9, %xmm1
2499; SSSE3-NEXT:    packssdw %xmm2, %xmm1
2500; SSSE3-NEXT:    packssdw %xmm1, %xmm3
2501; SSSE3-NEXT:    packsswb %xmm3, %xmm0
2502; SSSE3-NEXT:    retq
2503;
2504; SSE41-LABEL: trunc_ssat_v16i64_v16i8:
2505; SSE41:       # %bb.0:
2506; SSE41-NEXT:    movdqa %xmm0, %xmm8
2507; SSE41-NEXT:    movapd {{.*#+}} xmm11 = [127,127]
2508; SSE41-NEXT:    movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0]
2509; SSE41-NEXT:    movdqa %xmm6, %xmm0
2510; SSE41-NEXT:    pxor %xmm9, %xmm0
2511; SSE41-NEXT:    movdqa {{.*#+}} xmm12 = [2147483775,2147483775]
2512; SSE41-NEXT:    movdqa %xmm12, %xmm10
2513; SSE41-NEXT:    pcmpgtd %xmm0, %xmm10
2514; SSE41-NEXT:    pshufd {{.*#+}} xmm13 = xmm10[0,0,2,2]
2515; SSE41-NEXT:    pcmpeqd %xmm12, %xmm0
2516; SSE41-NEXT:    pshufd {{.*#+}} xmm14 = xmm0[1,1,3,3]
2517; SSE41-NEXT:    pand %xmm13, %xmm14
2518; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
2519; SSE41-NEXT:    por %xmm14, %xmm0
2520; SSE41-NEXT:    movapd %xmm11, %xmm10
2521; SSE41-NEXT:    blendvpd %xmm0, %xmm6, %xmm10
2522; SSE41-NEXT:    movdqa %xmm7, %xmm0
2523; SSE41-NEXT:    pxor %xmm9, %xmm0
2524; SSE41-NEXT:    movdqa %xmm12, %xmm6
2525; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
2526; SSE41-NEXT:    pshufd {{.*#+}} xmm13 = xmm6[0,0,2,2]
2527; SSE41-NEXT:    pcmpeqd %xmm12, %xmm0
2528; SSE41-NEXT:    pshufd {{.*#+}} xmm14 = xmm0[1,1,3,3]
2529; SSE41-NEXT:    pand %xmm13, %xmm14
2530; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
2531; SSE41-NEXT:    por %xmm14, %xmm0
2532; SSE41-NEXT:    movapd %xmm11, %xmm13
2533; SSE41-NEXT:    blendvpd %xmm0, %xmm7, %xmm13
2534; SSE41-NEXT:    movdqa %xmm4, %xmm0
2535; SSE41-NEXT:    pxor %xmm9, %xmm0
2536; SSE41-NEXT:    movdqa %xmm12, %xmm6
2537; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
2538; SSE41-NEXT:    pshufd {{.*#+}} xmm14 = xmm6[0,0,2,2]
2539; SSE41-NEXT:    pcmpeqd %xmm12, %xmm0
2540; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2541; SSE41-NEXT:    pand %xmm14, %xmm7
2542; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
2543; SSE41-NEXT:    por %xmm7, %xmm0
2544; SSE41-NEXT:    movapd %xmm11, %xmm14
2545; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm14
2546; SSE41-NEXT:    movdqa %xmm5, %xmm0
2547; SSE41-NEXT:    pxor %xmm9, %xmm0
2548; SSE41-NEXT:    movdqa %xmm12, %xmm4
2549; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
2550; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2551; SSE41-NEXT:    pcmpeqd %xmm12, %xmm0
2552; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2553; SSE41-NEXT:    pand %xmm6, %xmm7
2554; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2555; SSE41-NEXT:    por %xmm7, %xmm0
2556; SSE41-NEXT:    movapd %xmm11, %xmm15
2557; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm15
2558; SSE41-NEXT:    movdqa %xmm2, %xmm0
2559; SSE41-NEXT:    pxor %xmm9, %xmm0
2560; SSE41-NEXT:    movdqa %xmm12, %xmm5
2561; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
2562; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
2563; SSE41-NEXT:    pcmpeqd %xmm12, %xmm0
2564; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2565; SSE41-NEXT:    pand %xmm6, %xmm7
2566; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
2567; SSE41-NEXT:    por %xmm7, %xmm0
2568; SSE41-NEXT:    movapd %xmm11, %xmm5
2569; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm5
2570; SSE41-NEXT:    movdqa %xmm3, %xmm0
2571; SSE41-NEXT:    pxor %xmm9, %xmm0
2572; SSE41-NEXT:    movdqa %xmm12, %xmm2
2573; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
2574; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2]
2575; SSE41-NEXT:    pcmpeqd %xmm12, %xmm0
2576; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2577; SSE41-NEXT:    pand %xmm6, %xmm7
2578; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2579; SSE41-NEXT:    por %xmm7, %xmm0
2580; SSE41-NEXT:    movapd %xmm11, %xmm6
2581; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm6
2582; SSE41-NEXT:    movdqa %xmm8, %xmm0
2583; SSE41-NEXT:    pxor %xmm9, %xmm0
2584; SSE41-NEXT:    movdqa %xmm12, %xmm2
2585; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
2586; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
2587; SSE41-NEXT:    pcmpeqd %xmm12, %xmm0
2588; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2589; SSE41-NEXT:    pand %xmm3, %xmm7
2590; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2591; SSE41-NEXT:    por %xmm7, %xmm0
2592; SSE41-NEXT:    movapd %xmm11, %xmm7
2593; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm7
2594; SSE41-NEXT:    movdqa %xmm1, %xmm0
2595; SSE41-NEXT:    pxor %xmm9, %xmm0
2596; SSE41-NEXT:    movdqa %xmm12, %xmm2
2597; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
2598; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
2599; SSE41-NEXT:    pcmpeqd %xmm12, %xmm0
2600; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2601; SSE41-NEXT:    pand %xmm3, %xmm4
2602; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2603; SSE41-NEXT:    por %xmm4, %xmm0
2604; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm11
2605; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [18446744073709551488,18446744073709551488]
2606; SSE41-NEXT:    movapd %xmm11, %xmm0
2607; SSE41-NEXT:    xorpd %xmm9, %xmm0
2608; SSE41-NEXT:    movdqa {{.*#+}} xmm8 = [18446744071562067840,18446744071562067840]
2609; SSE41-NEXT:    movapd %xmm0, %xmm1
2610; SSE41-NEXT:    pcmpgtd %xmm8, %xmm1
2611; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
2612; SSE41-NEXT:    pcmpeqd %xmm8, %xmm0
2613; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2614; SSE41-NEXT:    pand %xmm4, %xmm3
2615; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2616; SSE41-NEXT:    por %xmm3, %xmm0
2617; SSE41-NEXT:    movapd %xmm2, %xmm3
2618; SSE41-NEXT:    blendvpd %xmm0, %xmm11, %xmm3
2619; SSE41-NEXT:    movapd %xmm7, %xmm0
2620; SSE41-NEXT:    xorpd %xmm9, %xmm0
2621; SSE41-NEXT:    movapd %xmm0, %xmm1
2622; SSE41-NEXT:    pcmpgtd %xmm8, %xmm1
2623; SSE41-NEXT:    pshufd {{.*#+}} xmm11 = xmm1[0,0,2,2]
2624; SSE41-NEXT:    pcmpeqd %xmm8, %xmm0
2625; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2626; SSE41-NEXT:    pand %xmm11, %xmm4
2627; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2628; SSE41-NEXT:    por %xmm4, %xmm0
2629; SSE41-NEXT:    movapd %xmm2, %xmm1
2630; SSE41-NEXT:    blendvpd %xmm0, %xmm7, %xmm1
2631; SSE41-NEXT:    packssdw %xmm3, %xmm1
2632; SSE41-NEXT:    movapd %xmm6, %xmm0
2633; SSE41-NEXT:    xorpd %xmm9, %xmm0
2634; SSE41-NEXT:    movapd %xmm0, %xmm3
2635; SSE41-NEXT:    pcmpgtd %xmm8, %xmm3
2636; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2637; SSE41-NEXT:    pcmpeqd %xmm8, %xmm0
2638; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2639; SSE41-NEXT:    pand %xmm4, %xmm7
2640; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2641; SSE41-NEXT:    por %xmm7, %xmm0
2642; SSE41-NEXT:    movapd %xmm2, %xmm3
2643; SSE41-NEXT:    blendvpd %xmm0, %xmm6, %xmm3
2644; SSE41-NEXT:    movapd %xmm5, %xmm0
2645; SSE41-NEXT:    xorpd %xmm9, %xmm0
2646; SSE41-NEXT:    movapd %xmm0, %xmm4
2647; SSE41-NEXT:    pcmpgtd %xmm8, %xmm4
2648; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2649; SSE41-NEXT:    pcmpeqd %xmm8, %xmm0
2650; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2651; SSE41-NEXT:    pand %xmm6, %xmm7
2652; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2653; SSE41-NEXT:    por %xmm7, %xmm0
2654; SSE41-NEXT:    movapd %xmm2, %xmm4
2655; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm4
2656; SSE41-NEXT:    packssdw %xmm3, %xmm4
2657; SSE41-NEXT:    packssdw %xmm4, %xmm1
2658; SSE41-NEXT:    movapd %xmm15, %xmm0
2659; SSE41-NEXT:    xorpd %xmm9, %xmm0
2660; SSE41-NEXT:    movapd %xmm0, %xmm3
2661; SSE41-NEXT:    pcmpgtd %xmm8, %xmm3
2662; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2663; SSE41-NEXT:    pcmpeqd %xmm8, %xmm0
2664; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
2665; SSE41-NEXT:    pand %xmm4, %xmm5
2666; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2667; SSE41-NEXT:    por %xmm5, %xmm0
2668; SSE41-NEXT:    movapd %xmm2, %xmm3
2669; SSE41-NEXT:    blendvpd %xmm0, %xmm15, %xmm3
2670; SSE41-NEXT:    movapd %xmm14, %xmm0
2671; SSE41-NEXT:    xorpd %xmm9, %xmm0
2672; SSE41-NEXT:    movapd %xmm0, %xmm4
2673; SSE41-NEXT:    pcmpgtd %xmm8, %xmm4
2674; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2675; SSE41-NEXT:    pcmpeqd %xmm8, %xmm0
2676; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2677; SSE41-NEXT:    pand %xmm5, %xmm6
2678; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2679; SSE41-NEXT:    por %xmm6, %xmm0
2680; SSE41-NEXT:    movapd %xmm2, %xmm4
2681; SSE41-NEXT:    blendvpd %xmm0, %xmm14, %xmm4
2682; SSE41-NEXT:    packssdw %xmm3, %xmm4
2683; SSE41-NEXT:    movapd %xmm13, %xmm0
2684; SSE41-NEXT:    xorpd %xmm9, %xmm0
2685; SSE41-NEXT:    movapd %xmm0, %xmm3
2686; SSE41-NEXT:    pcmpgtd %xmm8, %xmm3
2687; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
2688; SSE41-NEXT:    pcmpeqd %xmm8, %xmm0
2689; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2690; SSE41-NEXT:    pand %xmm5, %xmm6
2691; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2692; SSE41-NEXT:    por %xmm6, %xmm0
2693; SSE41-NEXT:    movapd %xmm2, %xmm3
2694; SSE41-NEXT:    blendvpd %xmm0, %xmm13, %xmm3
2695; SSE41-NEXT:    xorpd %xmm10, %xmm9
2696; SSE41-NEXT:    movapd %xmm9, %xmm0
2697; SSE41-NEXT:    pcmpgtd %xmm8, %xmm0
2698; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[0,0,2,2]
2699; SSE41-NEXT:    pcmpeqd %xmm8, %xmm9
2700; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm9[1,1,3,3]
2701; SSE41-NEXT:    pand %xmm5, %xmm6
2702; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2703; SSE41-NEXT:    por %xmm6, %xmm0
2704; SSE41-NEXT:    blendvpd %xmm0, %xmm10, %xmm2
2705; SSE41-NEXT:    packssdw %xmm3, %xmm2
2706; SSE41-NEXT:    packssdw %xmm2, %xmm4
2707; SSE41-NEXT:    packsswb %xmm4, %xmm1
2708; SSE41-NEXT:    movdqa %xmm1, %xmm0
2709; SSE41-NEXT:    retq
2710;
2711; AVX1-LABEL: trunc_ssat_v16i64_v16i8:
2712; AVX1:       # %bb.0:
2713; AVX1-NEXT:    vmovapd {{.*#+}} ymm4 = [127,127,127,127]
2714; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm5
2715; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [127,127]
2716; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm6, %xmm5
2717; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm6, %xmm7
2718; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm7, %ymm5
2719; AVX1-NEXT:    vblendvpd %ymm5, %ymm3, %ymm4, %ymm3
2720; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
2721; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm6, %xmm5
2722; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm6, %xmm7
2723; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm7, %ymm5
2724; AVX1-NEXT:    vblendvpd %ymm5, %ymm2, %ymm4, %ymm2
2725; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
2726; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm6, %xmm5
2727; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm6, %xmm7
2728; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm7, %ymm5
2729; AVX1-NEXT:    vblendvpd %ymm5, %ymm1, %ymm4, %ymm1
2730; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
2731; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm6, %xmm5
2732; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm6, %xmm6
2733; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
2734; AVX1-NEXT:    vblendvpd %ymm5, %ymm0, %ymm4, %ymm0
2735; AVX1-NEXT:    vmovapd {{.*#+}} ymm4 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
2736; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
2737; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [18446744073709551488,18446744073709551488]
2738; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm5, %xmm5
2739; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm0, %xmm7
2740; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm7, %ymm5
2741; AVX1-NEXT:    vblendvpd %ymm5, %ymm0, %ymm4, %ymm0
2742; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
2743; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm5, %xmm5
2744; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm1, %xmm7
2745; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm7, %ymm5
2746; AVX1-NEXT:    vblendvpd %ymm5, %ymm1, %ymm4, %ymm1
2747; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
2748; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm5, %xmm5
2749; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm2, %xmm7
2750; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm7, %ymm5
2751; AVX1-NEXT:    vblendvpd %ymm5, %ymm2, %ymm4, %ymm2
2752; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm5
2753; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm5, %xmm5
2754; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm3, %xmm6
2755; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
2756; AVX1-NEXT:    vblendvpd %ymm5, %ymm3, %ymm4, %ymm3
2757; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
2758; AVX1-NEXT:    vpackssdw %xmm4, %xmm3, %xmm3
2759; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
2760; AVX1-NEXT:    vpackssdw %xmm4, %xmm2, %xmm2
2761; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
2762; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2763; AVX1-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
2764; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2765; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
2766; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2767; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
2768; AVX1-NEXT:    vzeroupper
2769; AVX1-NEXT:    retq
2770;
2771; AVX2-LABEL: trunc_ssat_v16i64_v16i8:
2772; AVX2:       # %bb.0:
2773; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm4 = [127,127,127,127]
2774; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm4, %ymm5
2775; AVX2-NEXT:    vblendvpd %ymm5, %ymm2, %ymm4, %ymm2
2776; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm4, %ymm5
2777; AVX2-NEXT:    vblendvpd %ymm5, %ymm3, %ymm4, %ymm3
2778; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm4, %ymm5
2779; AVX2-NEXT:    vblendvpd %ymm5, %ymm0, %ymm4, %ymm0
2780; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm4, %ymm5
2781; AVX2-NEXT:    vblendvpd %ymm5, %ymm1, %ymm4, %ymm1
2782; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm4 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
2783; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm1, %ymm5
2784; AVX2-NEXT:    vblendvpd %ymm5, %ymm1, %ymm4, %ymm1
2785; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm0, %ymm5
2786; AVX2-NEXT:    vblendvpd %ymm5, %ymm0, %ymm4, %ymm0
2787; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
2788; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm3, %ymm1
2789; AVX2-NEXT:    vblendvpd %ymm1, %ymm3, %ymm4, %ymm1
2790; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm2, %ymm3
2791; AVX2-NEXT:    vblendvpd %ymm3, %ymm2, %ymm4, %ymm2
2792; AVX2-NEXT:    vpackssdw %ymm1, %ymm2, %ymm1
2793; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3]
2794; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2795; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
2796; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2797; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
2798; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
2799; AVX2-NEXT:    vzeroupper
2800; AVX2-NEXT:    retq
2801;
2802; AVX512-LABEL: trunc_ssat_v16i64_v16i8:
2803; AVX512:       # %bb.0:
2804; AVX512-NEXT:    vpbroadcastq {{.*#+}} zmm2 = [127,127,127,127,127,127,127,127]
2805; AVX512-NEXT:    vpminsq %zmm2, %zmm0, %zmm0
2806; AVX512-NEXT:    vpminsq %zmm2, %zmm1, %zmm1
2807; AVX512-NEXT:    vpbroadcastq {{.*#+}} zmm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
2808; AVX512-NEXT:    vpmaxsq %zmm2, %zmm1, %zmm1
2809; AVX512-NEXT:    vpmaxsq %zmm2, %zmm0, %zmm0
2810; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
2811; AVX512-NEXT:    vpmovqd %zmm1, %ymm1
2812; AVX512-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
2813; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
2814; AVX512-NEXT:    vzeroupper
2815; AVX512-NEXT:    retq
2816  %1 = icmp slt <16 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
2817  %2 = select <16 x i1> %1, <16 x i64> %a0, <16 x i64> <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
2818  %3 = icmp sgt <16 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
2819  %4 = select <16 x i1> %3, <16 x i64> %2, <16 x i64> <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
2820  %5 = trunc <16 x i64> %4 to <16 x i8>
2821  ret <16 x i8> %5
2822}
2823
2824define <8 x i8> @trunc_ssat_v8i32_v8i8(<8 x i32> %a0) {
2825; SSE2-LABEL: trunc_ssat_v8i32_v8i8:
2826; SSE2:       # %bb.0:
2827; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127]
2828; SSE2-NEXT:    movdqa %xmm2, %xmm3
2829; SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
2830; SSE2-NEXT:    pand %xmm3, %xmm0
2831; SSE2-NEXT:    pandn %xmm2, %xmm3
2832; SSE2-NEXT:    por %xmm0, %xmm3
2833; SSE2-NEXT:    movdqa %xmm2, %xmm0
2834; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
2835; SSE2-NEXT:    pand %xmm0, %xmm1
2836; SSE2-NEXT:    pandn %xmm2, %xmm0
2837; SSE2-NEXT:    por %xmm1, %xmm0
2838; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
2839; SSE2-NEXT:    movdqa %xmm0, %xmm2
2840; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
2841; SSE2-NEXT:    pand %xmm2, %xmm0
2842; SSE2-NEXT:    pandn %xmm1, %xmm2
2843; SSE2-NEXT:    por %xmm0, %xmm2
2844; SSE2-NEXT:    movdqa %xmm3, %xmm0
2845; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
2846; SSE2-NEXT:    pand %xmm0, %xmm3
2847; SSE2-NEXT:    pandn %xmm1, %xmm0
2848; SSE2-NEXT:    por %xmm3, %xmm0
2849; SSE2-NEXT:    packssdw %xmm2, %xmm0
2850; SSE2-NEXT:    retq
2851;
2852; SSSE3-LABEL: trunc_ssat_v8i32_v8i8:
2853; SSSE3:       # %bb.0:
2854; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127]
2855; SSSE3-NEXT:    movdqa %xmm2, %xmm3
2856; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm3
2857; SSSE3-NEXT:    pand %xmm3, %xmm0
2858; SSSE3-NEXT:    pandn %xmm2, %xmm3
2859; SSSE3-NEXT:    por %xmm0, %xmm3
2860; SSSE3-NEXT:    movdqa %xmm2, %xmm0
2861; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm0
2862; SSSE3-NEXT:    pand %xmm0, %xmm1
2863; SSSE3-NEXT:    pandn %xmm2, %xmm0
2864; SSSE3-NEXT:    por %xmm1, %xmm0
2865; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
2866; SSSE3-NEXT:    movdqa %xmm0, %xmm2
2867; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
2868; SSSE3-NEXT:    pand %xmm2, %xmm0
2869; SSSE3-NEXT:    pandn %xmm1, %xmm2
2870; SSSE3-NEXT:    por %xmm0, %xmm2
2871; SSSE3-NEXT:    movdqa %xmm3, %xmm0
2872; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm0
2873; SSSE3-NEXT:    pand %xmm0, %xmm3
2874; SSSE3-NEXT:    pandn %xmm1, %xmm0
2875; SSSE3-NEXT:    por %xmm3, %xmm0
2876; SSSE3-NEXT:    packssdw %xmm2, %xmm0
2877; SSSE3-NEXT:    retq
2878;
2879; SSE41-LABEL: trunc_ssat_v8i32_v8i8:
2880; SSE41:       # %bb.0:
2881; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127]
2882; SSE41-NEXT:    pminsd %xmm2, %xmm0
2883; SSE41-NEXT:    pminsd %xmm2, %xmm1
2884; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [4294967168,4294967168,4294967168,4294967168]
2885; SSE41-NEXT:    pmaxsd %xmm2, %xmm1
2886; SSE41-NEXT:    pmaxsd %xmm2, %xmm0
2887; SSE41-NEXT:    packssdw %xmm1, %xmm0
2888; SSE41-NEXT:    retq
2889;
2890; AVX1-LABEL: trunc_ssat_v8i32_v8i8:
2891; AVX1:       # %bb.0:
2892; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127,127,127]
2893; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm2
2894; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
2895; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
2896; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
2897; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
2898; AVX1-NEXT:    vpmaxsd %xmm1, %xmm2, %xmm1
2899; AVX1-NEXT:    vpackssdw %xmm0, %xmm1, %xmm0
2900; AVX1-NEXT:    vzeroupper
2901; AVX1-NEXT:    retq
2902;
2903; AVX2-LABEL: trunc_ssat_v8i32_v8i8:
2904; AVX2:       # %bb.0:
2905; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127]
2906; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
2907; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168]
2908; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
2909; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
2910; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2911; AVX2-NEXT:    vzeroupper
2912; AVX2-NEXT:    retq
2913;
2914; AVX512F-LABEL: trunc_ssat_v8i32_v8i8:
2915; AVX512F:       # %bb.0:
2916; AVX512F-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127]
2917; AVX512F-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
2918; AVX512F-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168]
2919; AVX512F-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
2920; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
2921; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
2922; AVX512F-NEXT:    vzeroupper
2923; AVX512F-NEXT:    retq
2924;
2925; AVX512VL-LABEL: trunc_ssat_v8i32_v8i8:
2926; AVX512VL:       # %bb.0:
2927; AVX512VL-NEXT:    vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
2928; AVX512VL-NEXT:    vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
2929; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
2930; AVX512VL-NEXT:    vzeroupper
2931; AVX512VL-NEXT:    retq
2932;
2933; AVX512BW-LABEL: trunc_ssat_v8i32_v8i8:
2934; AVX512BW:       # %bb.0:
2935; AVX512BW-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127]
2936; AVX512BW-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
2937; AVX512BW-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168]
2938; AVX512BW-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
2939; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
2940; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
2941; AVX512BW-NEXT:    vzeroupper
2942; AVX512BW-NEXT:    retq
2943;
2944; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i8:
2945; AVX512BWVL:       # %bb.0:
2946; AVX512BWVL-NEXT:    vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
2947; AVX512BWVL-NEXT:    vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
2948; AVX512BWVL-NEXT:    vpmovdw %ymm0, %xmm0
2949; AVX512BWVL-NEXT:    vzeroupper
2950; AVX512BWVL-NEXT:    retq
2951  %1 = icmp slt <8 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
2952  %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
2953  %3 = icmp sgt <8 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
2954  %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
2955  %5 = trunc <8 x i32> %4 to <8 x i8>
2956  ret <8 x i8> %5
2957}
2958
2959define void @trunc_ssat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
2960; SSE-LABEL: trunc_ssat_v8i32_v8i8_store:
2961; SSE:       # %bb.0:
2962; SSE-NEXT:    packssdw %xmm1, %xmm0
2963; SSE-NEXT:    packsswb %xmm0, %xmm0
2964; SSE-NEXT:    movq %xmm0, (%rdi)
2965; SSE-NEXT:    retq
2966;
2967; AVX1-LABEL: trunc_ssat_v8i32_v8i8_store:
2968; AVX1:       # %bb.0:
2969; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
2970; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2971; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2972; AVX1-NEXT:    vmovq %xmm0, (%rdi)
2973; AVX1-NEXT:    vzeroupper
2974; AVX1-NEXT:    retq
2975;
2976; AVX2-LABEL: trunc_ssat_v8i32_v8i8_store:
2977; AVX2:       # %bb.0:
2978; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
2979; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2980; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2981; AVX2-NEXT:    vmovq %xmm0, (%rdi)
2982; AVX2-NEXT:    vzeroupper
2983; AVX2-NEXT:    retq
2984;
2985; AVX512F-LABEL: trunc_ssat_v8i32_v8i8_store:
2986; AVX512F:       # %bb.0:
2987; AVX512F-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127]
2988; AVX512F-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
2989; AVX512F-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168]
2990; AVX512F-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
2991; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
2992; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2993; AVX512F-NEXT:    vmovq %xmm0, (%rdi)
2994; AVX512F-NEXT:    vzeroupper
2995; AVX512F-NEXT:    retq
2996;
2997; AVX512VL-LABEL: trunc_ssat_v8i32_v8i8_store:
2998; AVX512VL:       # %bb.0:
2999; AVX512VL-NEXT:    vpmovsdb %ymm0, (%rdi)
3000; AVX512VL-NEXT:    vzeroupper
3001; AVX512VL-NEXT:    retq
3002;
3003; AVX512BW-LABEL: trunc_ssat_v8i32_v8i8_store:
3004; AVX512BW:       # %bb.0:
3005; AVX512BW-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127]
3006; AVX512BW-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
3007; AVX512BW-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168]
3008; AVX512BW-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
3009; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
3010; AVX512BW-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3011; AVX512BW-NEXT:    vmovq %xmm0, (%rdi)
3012; AVX512BW-NEXT:    vzeroupper
3013; AVX512BW-NEXT:    retq
3014;
3015; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i8_store:
3016; AVX512BWVL:       # %bb.0:
3017; AVX512BWVL-NEXT:    vpmovsdb %ymm0, (%rdi)
3018; AVX512BWVL-NEXT:    vzeroupper
3019; AVX512BWVL-NEXT:    retq
3020  %1 = icmp slt <8 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
3021  %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
3022  %3 = icmp sgt <8 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
3023  %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
3024  %5 = trunc <8 x i32> %4 to <8 x i8>
3025  store <8 x i8> %5, <8 x i8> *%p1
3026  ret void
3027}
3028
3029define <16 x i8> @trunc_ssat_v16i32_v16i8(<16 x i32> %a0) {
3030; SSE-LABEL: trunc_ssat_v16i32_v16i8:
3031; SSE:       # %bb.0:
3032; SSE-NEXT:    packssdw %xmm3, %xmm2
3033; SSE-NEXT:    packssdw %xmm1, %xmm0
3034; SSE-NEXT:    packsswb %xmm2, %xmm0
3035; SSE-NEXT:    retq
3036;
3037; AVX1-LABEL: trunc_ssat_v16i32_v16i8:
3038; AVX1:       # %bb.0:
3039; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
3040; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
3041; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
3042; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
3043; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
3044; AVX1-NEXT:    vzeroupper
3045; AVX1-NEXT:    retq
3046;
3047; AVX2-LABEL: trunc_ssat_v16i32_v16i8:
3048; AVX2:       # %bb.0:
3049; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
3050; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3051; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
3052; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
3053; AVX2-NEXT:    vzeroupper
3054; AVX2-NEXT:    retq
3055;
3056; AVX512-LABEL: trunc_ssat_v16i32_v16i8:
3057; AVX512:       # %bb.0:
3058; AVX512-NEXT:    vpmovsdb %zmm0, %xmm0
3059; AVX512-NEXT:    vzeroupper
3060; AVX512-NEXT:    retq
3061  %1 = icmp slt <16 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
3062  %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
3063  %3 = icmp sgt <16 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
3064  %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
3065  %5 = trunc <16 x i32> %4 to <16 x i8>
3066  ret <16 x i8> %5
3067}
3068
3069define <16 x i8> @trunc_ssat_v16i16_v16i8(<16 x i16> %a0) {
3070; SSE-LABEL: trunc_ssat_v16i16_v16i8:
3071; SSE:       # %bb.0:
3072; SSE-NEXT:    packsswb %xmm1, %xmm0
3073; SSE-NEXT:    retq
3074;
3075; AVX1-LABEL: trunc_ssat_v16i16_v16i8:
3076; AVX1:       # %bb.0:
3077; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
3078; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
3079; AVX1-NEXT:    vzeroupper
3080; AVX1-NEXT:    retq
3081;
3082; AVX2-LABEL: trunc_ssat_v16i16_v16i8:
3083; AVX2:       # %bb.0:
3084; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
3085; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
3086; AVX2-NEXT:    vzeroupper
3087; AVX2-NEXT:    retq
3088;
3089; AVX512F-LABEL: trunc_ssat_v16i16_v16i8:
3090; AVX512F:       # %bb.0:
3091; AVX512F-NEXT:    vpminsw {{.*}}(%rip), %ymm0, %ymm0
3092; AVX512F-NEXT:    vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
3093; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
3094; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
3095; AVX512F-NEXT:    vzeroupper
3096; AVX512F-NEXT:    retq
3097;
3098; AVX512VL-LABEL: trunc_ssat_v16i16_v16i8:
3099; AVX512VL:       # %bb.0:
3100; AVX512VL-NEXT:    vpminsw {{.*}}(%rip), %ymm0, %ymm0
3101; AVX512VL-NEXT:    vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
3102; AVX512VL-NEXT:    vpmovsxwd %ymm0, %zmm0
3103; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
3104; AVX512VL-NEXT:    vzeroupper
3105; AVX512VL-NEXT:    retq
3106;
3107; AVX512BW-LABEL: trunc_ssat_v16i16_v16i8:
3108; AVX512BW:       # %bb.0:
3109; AVX512BW-NEXT:    vpminsw {{.*}}(%rip), %ymm0, %ymm0
3110; AVX512BW-NEXT:    vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
3111; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
3112; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
3113; AVX512BW-NEXT:    vzeroupper
3114; AVX512BW-NEXT:    retq
3115;
3116; AVX512BWVL-LABEL: trunc_ssat_v16i16_v16i8:
3117; AVX512BWVL:       # %bb.0:
3118; AVX512BWVL-NEXT:    vpmovswb %ymm0, %xmm0
3119; AVX512BWVL-NEXT:    vzeroupper
3120; AVX512BWVL-NEXT:    retq
3121  %1 = icmp slt <16 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
3122  %2 = select <16 x i1> %1, <16 x i16> %a0, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
3123  %3 = icmp sgt <16 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
3124  %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
3125  %5 = trunc <16 x i16> %4 to <16 x i8>
3126  ret <16 x i8> %5
3127}
3128
3129define <32 x i8> @trunc_ssat_v32i16_v32i8(<32 x i16> %a0) {
3130; SSE-LABEL: trunc_ssat_v32i16_v32i8:
3131; SSE:       # %bb.0:
3132; SSE-NEXT:    packsswb %xmm1, %xmm0
3133; SSE-NEXT:    packsswb %xmm3, %xmm2
3134; SSE-NEXT:    movdqa %xmm2, %xmm1
3135; SSE-NEXT:    retq
3136;
3137; AVX1-LABEL: trunc_ssat_v32i16_v32i8:
3138; AVX1:       # %bb.0:
3139; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
3140; AVX1-NEXT:    vpacksswb %xmm2, %xmm1, %xmm1
3141; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
3142; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
3143; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
3144; AVX1-NEXT:    retq
3145;
3146; AVX2-LABEL: trunc_ssat_v32i16_v32i8:
3147; AVX2:       # %bb.0:
3148; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
3149; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3150; AVX2-NEXT:    retq
3151;
3152; AVX512F-LABEL: trunc_ssat_v32i16_v32i8:
3153; AVX512F:       # %bb.0:
3154; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
3155; AVX512F-NEXT:    vpminsw %ymm2, %ymm0, %ymm0
3156; AVX512F-NEXT:    vpminsw %ymm2, %ymm1, %ymm1
3157; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408]
3158; AVX512F-NEXT:    vpmaxsw %ymm2, %ymm1, %ymm1
3159; AVX512F-NEXT:    vpmaxsw %ymm2, %ymm0, %ymm0
3160; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
3161; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
3162; AVX512F-NEXT:    vpmovsxwd %ymm1, %zmm1
3163; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
3164; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
3165; AVX512F-NEXT:    retq
3166;
3167; AVX512VL-LABEL: trunc_ssat_v32i16_v32i8:
3168; AVX512VL:       # %bb.0:
3169; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
3170; AVX512VL-NEXT:    vpminsw %ymm2, %ymm0, %ymm0
3171; AVX512VL-NEXT:    vpminsw %ymm2, %ymm1, %ymm1
3172; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408,65408]
3173; AVX512VL-NEXT:    vpmaxsw %ymm2, %ymm1, %ymm1
3174; AVX512VL-NEXT:    vpmaxsw %ymm2, %ymm0, %ymm0
3175; AVX512VL-NEXT:    vpmovsxwd %ymm0, %zmm0
3176; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
3177; AVX512VL-NEXT:    vpmovsxwd %ymm1, %zmm1
3178; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
3179; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
3180; AVX512VL-NEXT:    retq
3181;
3182; AVX512BW-LABEL: trunc_ssat_v32i16_v32i8:
3183; AVX512BW:       # %bb.0:
3184; AVX512BW-NEXT:    vpmovswb %zmm0, %ymm0
3185; AVX512BW-NEXT:    retq
3186;
3187; AVX512BWVL-LABEL: trunc_ssat_v32i16_v32i8:
3188; AVX512BWVL:       # %bb.0:
3189; AVX512BWVL-NEXT:    vpmovswb %zmm0, %ymm0
3190; AVX512BWVL-NEXT:    retq
3191  %1 = icmp slt <32 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
3192  %2 = select <32 x i1> %1, <32 x i16> %a0, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
3193  %3 = icmp sgt <32 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
3194  %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
3195  %5 = trunc <32 x i16> %4 to <32 x i8>
3196  ret <32 x i8> %5
3197}
3198