• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=SKX
13
14;
15; Signed saturation truncation to vXi32
16;
17
18define <2 x i32> @trunc_ssat_v2i64_v2i32(<2 x i64> %a0) {
19; SSE2-LABEL: trunc_ssat_v2i64_v2i32:
20; SSE2:       # %bb.0:
21; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
22; SSE2-NEXT:    movdqa %xmm0, %xmm2
23; SSE2-NEXT:    pxor %xmm1, %xmm2
24; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
25; SSE2-NEXT:    movdqa %xmm3, %xmm4
26; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
27; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
28; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
29; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
30; SSE2-NEXT:    pand %xmm5, %xmm2
31; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
32; SSE2-NEXT:    por %xmm2, %xmm3
33; SSE2-NEXT:    pand %xmm3, %xmm0
34; SSE2-NEXT:    pandn {{.*}}(%rip), %xmm3
35; SSE2-NEXT:    por %xmm0, %xmm3
36; SSE2-NEXT:    pxor %xmm3, %xmm1
37; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [18446744069414584320,18446744069414584320]
38; SSE2-NEXT:    movdqa %xmm1, %xmm2
39; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
40; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
41; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
42; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
43; SSE2-NEXT:    pand %xmm4, %xmm0
44; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
45; SSE2-NEXT:    por %xmm0, %xmm1
46; SSE2-NEXT:    pand %xmm1, %xmm3
47; SSE2-NEXT:    pandn {{.*}}(%rip), %xmm1
48; SSE2-NEXT:    por %xmm3, %xmm1
49; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
50; SSE2-NEXT:    retq
51;
52; SSSE3-LABEL: trunc_ssat_v2i64_v2i32:
53; SSSE3:       # %bb.0:
54; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
55; SSSE3-NEXT:    movdqa %xmm0, %xmm2
56; SSSE3-NEXT:    pxor %xmm1, %xmm2
57; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
58; SSSE3-NEXT:    movdqa %xmm3, %xmm4
59; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm4
60; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
61; SSSE3-NEXT:    pcmpeqd %xmm3, %xmm2
62; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
63; SSSE3-NEXT:    pand %xmm5, %xmm2
64; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
65; SSSE3-NEXT:    por %xmm2, %xmm3
66; SSSE3-NEXT:    pand %xmm3, %xmm0
67; SSSE3-NEXT:    pandn {{.*}}(%rip), %xmm3
68; SSSE3-NEXT:    por %xmm0, %xmm3
69; SSSE3-NEXT:    pxor %xmm3, %xmm1
70; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [18446744069414584320,18446744069414584320]
71; SSSE3-NEXT:    movdqa %xmm1, %xmm2
72; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
73; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
74; SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
75; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
76; SSSE3-NEXT:    pand %xmm4, %xmm0
77; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
78; SSSE3-NEXT:    por %xmm0, %xmm1
79; SSSE3-NEXT:    pand %xmm1, %xmm3
80; SSSE3-NEXT:    pandn {{.*}}(%rip), %xmm1
81; SSSE3-NEXT:    por %xmm3, %xmm1
82; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
83; SSSE3-NEXT:    retq
84;
85; SSE41-LABEL: trunc_ssat_v2i64_v2i32:
86; SSE41:       # %bb.0:
87; SSE41-NEXT:    movdqa %xmm0, %xmm1
88; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [2147483647,2147483647]
89; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
90; SSE41-NEXT:    pxor %xmm3, %xmm0
91; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [4294967295,4294967295]
92; SSE41-NEXT:    movdqa %xmm4, %xmm5
93; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
94; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
95; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
96; SSE41-NEXT:    pand %xmm5, %xmm0
97; SSE41-NEXT:    por %xmm4, %xmm0
98; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
99; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
100; SSE41-NEXT:    pxor %xmm2, %xmm3
101; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [18446744069414584320,18446744069414584320]
102; SSE41-NEXT:    movdqa %xmm3, %xmm4
103; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
104; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
105; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
106; SSE41-NEXT:    pand %xmm4, %xmm0
107; SSE41-NEXT:    por %xmm3, %xmm0
108; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
109; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
110; SSE41-NEXT:    retq
111;
112; AVX-LABEL: trunc_ssat_v2i64_v2i32:
113; AVX:       # %bb.0:
114; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647]
115; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
116; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
117; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
118; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
119; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
120; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
121; AVX-NEXT:    retq
122;
123; AVX512F-LABEL: trunc_ssat_v2i64_v2i32:
124; AVX512F:       # %bb.0:
125; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
126; AVX512F-NEXT:    vpmovsqd %zmm0, %ymm0
127; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
128; AVX512F-NEXT:    vzeroupper
129; AVX512F-NEXT:    retq
130;
131; AVX512VL-LABEL: trunc_ssat_v2i64_v2i32:
132; AVX512VL:       # %bb.0:
133; AVX512VL-NEXT:    vpmovsqd %xmm0, %xmm0
134; AVX512VL-NEXT:    retq
135;
136; AVX512BW-LABEL: trunc_ssat_v2i64_v2i32:
137; AVX512BW:       # %bb.0:
138; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
139; AVX512BW-NEXT:    vpmovsqd %zmm0, %ymm0
140; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
141; AVX512BW-NEXT:    vzeroupper
142; AVX512BW-NEXT:    retq
143;
144; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i32:
145; AVX512BWVL:       # %bb.0:
146; AVX512BWVL-NEXT:    vpmovsqd %xmm0, %xmm0
147; AVX512BWVL-NEXT:    retq
148;
149; SKX-LABEL: trunc_ssat_v2i64_v2i32:
150; SKX:       # %bb.0:
151; SKX-NEXT:    vpmovsqd %xmm0, %xmm0
152; SKX-NEXT:    retq
153  %1 = icmp slt <2 x i64> %a0, <i64 2147483647, i64 2147483647>
154  %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 2147483647, i64 2147483647>
155  %3 = icmp sgt <2 x i64> %2, <i64 -2147483648, i64 -2147483648>
156  %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -2147483648, i64 -2147483648>
157  %5 = trunc <2 x i64> %4 to <2 x i32>
158  ret <2 x i32> %5
159}
160
161define void @trunc_ssat_v2i64_v2i32_store(<2 x i64> %a0, <2 x i32>* %p1) {
162; SSE2-LABEL: trunc_ssat_v2i64_v2i32_store:
163; SSE2:       # %bb.0:
164; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
165; SSE2-NEXT:    movdqa %xmm0, %xmm2
166; SSE2-NEXT:    pxor %xmm1, %xmm2
167; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
168; SSE2-NEXT:    movdqa %xmm3, %xmm4
169; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
170; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
171; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
172; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
173; SSE2-NEXT:    pand %xmm5, %xmm2
174; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
175; SSE2-NEXT:    por %xmm2, %xmm3
176; SSE2-NEXT:    pand %xmm3, %xmm0
177; SSE2-NEXT:    pandn {{.*}}(%rip), %xmm3
178; SSE2-NEXT:    por %xmm0, %xmm3
179; SSE2-NEXT:    pxor %xmm3, %xmm1
180; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [18446744069414584320,18446744069414584320]
181; SSE2-NEXT:    movdqa %xmm1, %xmm2
182; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
183; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
184; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
185; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
186; SSE2-NEXT:    pand %xmm4, %xmm0
187; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
188; SSE2-NEXT:    por %xmm0, %xmm1
189; SSE2-NEXT:    pand %xmm1, %xmm3
190; SSE2-NEXT:    pandn {{.*}}(%rip), %xmm1
191; SSE2-NEXT:    por %xmm3, %xmm1
192; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
193; SSE2-NEXT:    movq %xmm0, (%rdi)
194; SSE2-NEXT:    retq
195;
196; SSSE3-LABEL: trunc_ssat_v2i64_v2i32_store:
197; SSSE3:       # %bb.0:
198; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
199; SSSE3-NEXT:    movdqa %xmm0, %xmm2
200; SSSE3-NEXT:    pxor %xmm1, %xmm2
201; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
202; SSSE3-NEXT:    movdqa %xmm3, %xmm4
203; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm4
204; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
205; SSSE3-NEXT:    pcmpeqd %xmm3, %xmm2
206; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
207; SSSE3-NEXT:    pand %xmm5, %xmm2
208; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
209; SSSE3-NEXT:    por %xmm2, %xmm3
210; SSSE3-NEXT:    pand %xmm3, %xmm0
211; SSSE3-NEXT:    pandn {{.*}}(%rip), %xmm3
212; SSSE3-NEXT:    por %xmm0, %xmm3
213; SSSE3-NEXT:    pxor %xmm3, %xmm1
214; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [18446744069414584320,18446744069414584320]
215; SSSE3-NEXT:    movdqa %xmm1, %xmm2
216; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
217; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
218; SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
219; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
220; SSSE3-NEXT:    pand %xmm4, %xmm0
221; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
222; SSSE3-NEXT:    por %xmm0, %xmm1
223; SSSE3-NEXT:    pand %xmm1, %xmm3
224; SSSE3-NEXT:    pandn {{.*}}(%rip), %xmm1
225; SSSE3-NEXT:    por %xmm3, %xmm1
226; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
227; SSSE3-NEXT:    movq %xmm0, (%rdi)
228; SSSE3-NEXT:    retq
229;
230; SSE41-LABEL: trunc_ssat_v2i64_v2i32_store:
231; SSE41:       # %bb.0:
232; SSE41-NEXT:    movdqa %xmm0, %xmm1
233; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [2147483647,2147483647]
234; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
235; SSE41-NEXT:    pxor %xmm3, %xmm0
236; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [4294967295,4294967295]
237; SSE41-NEXT:    movdqa %xmm4, %xmm5
238; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
239; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
240; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
241; SSE41-NEXT:    pand %xmm5, %xmm0
242; SSE41-NEXT:    por %xmm4, %xmm0
243; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
244; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
245; SSE41-NEXT:    pxor %xmm2, %xmm3
246; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [18446744069414584320,18446744069414584320]
247; SSE41-NEXT:    movdqa %xmm3, %xmm4
248; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
249; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
250; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
251; SSE41-NEXT:    pand %xmm4, %xmm0
252; SSE41-NEXT:    por %xmm3, %xmm0
253; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
254; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
255; SSE41-NEXT:    movq %xmm0, (%rdi)
256; SSE41-NEXT:    retq
257;
258; AVX-LABEL: trunc_ssat_v2i64_v2i32_store:
259; AVX:       # %bb.0:
260; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647]
261; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
262; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
263; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
264; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
265; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
266; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
267; AVX-NEXT:    vmovlpd %xmm0, (%rdi)
268; AVX-NEXT:    retq
269;
270; AVX512F-LABEL: trunc_ssat_v2i64_v2i32_store:
271; AVX512F:       # %bb.0:
272; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
273; AVX512F-NEXT:    vpmovsqd %zmm0, %ymm0
274; AVX512F-NEXT:    vmovq %xmm0, (%rdi)
275; AVX512F-NEXT:    vzeroupper
276; AVX512F-NEXT:    retq
277;
278; AVX512VL-LABEL: trunc_ssat_v2i64_v2i32_store:
279; AVX512VL:       # %bb.0:
280; AVX512VL-NEXT:    vpmovsqd %xmm0, (%rdi)
281; AVX512VL-NEXT:    retq
282;
283; AVX512BW-LABEL: trunc_ssat_v2i64_v2i32_store:
284; AVX512BW:       # %bb.0:
285; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
286; AVX512BW-NEXT:    vpmovsqd %zmm0, %ymm0
287; AVX512BW-NEXT:    vmovq %xmm0, (%rdi)
288; AVX512BW-NEXT:    vzeroupper
289; AVX512BW-NEXT:    retq
290;
291; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i32_store:
292; AVX512BWVL:       # %bb.0:
293; AVX512BWVL-NEXT:    vpmovsqd %xmm0, (%rdi)
294; AVX512BWVL-NEXT:    retq
295;
296; SKX-LABEL: trunc_ssat_v2i64_v2i32_store:
297; SKX:       # %bb.0:
298; SKX-NEXT:    vpmovsqd %xmm0, (%rdi)
299; SKX-NEXT:    retq
300  %1 = icmp slt <2 x i64> %a0, <i64 2147483647, i64 2147483647>
301  %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 2147483647, i64 2147483647>
302  %3 = icmp sgt <2 x i64> %2, <i64 -2147483648, i64 -2147483648>
303  %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -2147483648, i64 -2147483648>
304  %5 = trunc <2 x i64> %4 to <2 x i32>
305  store <2 x i32> %5, <2 x i32>* %p1
306  ret void
307}
308
309define <4 x i32> @trunc_ssat_v4i64_v4i32(<4 x i64> %a0) {
310; SSE2-LABEL: trunc_ssat_v4i64_v4i32:
311; SSE2:       # %bb.0:
312; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [2147483647,2147483647]
313; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
314; SSE2-NEXT:    movdqa %xmm0, %xmm3
315; SSE2-NEXT:    pxor %xmm2, %xmm3
316; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [4294967295,4294967295]
317; SSE2-NEXT:    movdqa %xmm5, %xmm6
318; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
319; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
320; SSE2-NEXT:    pcmpeqd %xmm5, %xmm3
321; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
322; SSE2-NEXT:    pand %xmm7, %xmm4
323; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
324; SSE2-NEXT:    por %xmm4, %xmm3
325; SSE2-NEXT:    pand %xmm3, %xmm0
326; SSE2-NEXT:    pandn %xmm8, %xmm3
327; SSE2-NEXT:    por %xmm0, %xmm3
328; SSE2-NEXT:    movdqa %xmm1, %xmm0
329; SSE2-NEXT:    pxor %xmm2, %xmm0
330; SSE2-NEXT:    movdqa %xmm5, %xmm4
331; SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
332; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
333; SSE2-NEXT:    pcmpeqd %xmm5, %xmm0
334; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
335; SSE2-NEXT:    pand %xmm6, %xmm0
336; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
337; SSE2-NEXT:    por %xmm0, %xmm4
338; SSE2-NEXT:    pand %xmm4, %xmm1
339; SSE2-NEXT:    pandn %xmm8, %xmm4
340; SSE2-NEXT:    por %xmm1, %xmm4
341; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
342; SSE2-NEXT:    movdqa %xmm4, %xmm0
343; SSE2-NEXT:    pxor %xmm2, %xmm0
344; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [18446744069414584320,18446744069414584320]
345; SSE2-NEXT:    movdqa %xmm0, %xmm6
346; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
347; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
348; SSE2-NEXT:    pcmpeqd %xmm5, %xmm0
349; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
350; SSE2-NEXT:    pand %xmm7, %xmm0
351; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
352; SSE2-NEXT:    por %xmm0, %xmm6
353; SSE2-NEXT:    pand %xmm6, %xmm4
354; SSE2-NEXT:    pandn %xmm1, %xmm6
355; SSE2-NEXT:    por %xmm4, %xmm6
356; SSE2-NEXT:    pxor %xmm3, %xmm2
357; SSE2-NEXT:    movdqa %xmm2, %xmm0
358; SSE2-NEXT:    pcmpgtd %xmm5, %xmm0
359; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
360; SSE2-NEXT:    pcmpeqd %xmm5, %xmm2
361; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
362; SSE2-NEXT:    pand %xmm4, %xmm2
363; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
364; SSE2-NEXT:    por %xmm2, %xmm0
365; SSE2-NEXT:    pand %xmm0, %xmm3
366; SSE2-NEXT:    pandn %xmm1, %xmm0
367; SSE2-NEXT:    por %xmm3, %xmm0
368; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm6[0,2]
369; SSE2-NEXT:    retq
370;
371; SSSE3-LABEL: trunc_ssat_v4i64_v4i32:
372; SSSE3:       # %bb.0:
373; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [2147483647,2147483647]
374; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
375; SSSE3-NEXT:    movdqa %xmm0, %xmm3
376; SSSE3-NEXT:    pxor %xmm2, %xmm3
377; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [4294967295,4294967295]
378; SSSE3-NEXT:    movdqa %xmm5, %xmm6
379; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm6
380; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
381; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm3
382; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
383; SSSE3-NEXT:    pand %xmm7, %xmm4
384; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
385; SSSE3-NEXT:    por %xmm4, %xmm3
386; SSSE3-NEXT:    pand %xmm3, %xmm0
387; SSSE3-NEXT:    pandn %xmm8, %xmm3
388; SSSE3-NEXT:    por %xmm0, %xmm3
389; SSSE3-NEXT:    movdqa %xmm1, %xmm0
390; SSSE3-NEXT:    pxor %xmm2, %xmm0
391; SSSE3-NEXT:    movdqa %xmm5, %xmm4
392; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm4
393; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
394; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm0
395; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
396; SSSE3-NEXT:    pand %xmm6, %xmm0
397; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
398; SSSE3-NEXT:    por %xmm0, %xmm4
399; SSSE3-NEXT:    pand %xmm4, %xmm1
400; SSSE3-NEXT:    pandn %xmm8, %xmm4
401; SSSE3-NEXT:    por %xmm1, %xmm4
402; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
403; SSSE3-NEXT:    movdqa %xmm4, %xmm0
404; SSSE3-NEXT:    pxor %xmm2, %xmm0
405; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [18446744069414584320,18446744069414584320]
406; SSSE3-NEXT:    movdqa %xmm0, %xmm6
407; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm6
408; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
409; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm0
410; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
411; SSSE3-NEXT:    pand %xmm7, %xmm0
412; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
413; SSSE3-NEXT:    por %xmm0, %xmm6
414; SSSE3-NEXT:    pand %xmm6, %xmm4
415; SSSE3-NEXT:    pandn %xmm1, %xmm6
416; SSSE3-NEXT:    por %xmm4, %xmm6
417; SSSE3-NEXT:    pxor %xmm3, %xmm2
418; SSSE3-NEXT:    movdqa %xmm2, %xmm0
419; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm0
420; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
421; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm2
422; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
423; SSSE3-NEXT:    pand %xmm4, %xmm2
424; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
425; SSSE3-NEXT:    por %xmm2, %xmm0
426; SSSE3-NEXT:    pand %xmm0, %xmm3
427; SSSE3-NEXT:    pandn %xmm1, %xmm0
428; SSSE3-NEXT:    por %xmm3, %xmm0
429; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm6[0,2]
430; SSSE3-NEXT:    retq
431;
432; SSE41-LABEL: trunc_ssat_v4i64_v4i32:
433; SSE41:       # %bb.0:
434; SSE41-NEXT:    movdqa %xmm0, %xmm2
435; SSE41-NEXT:    movapd {{.*#+}} xmm4 = [2147483647,2147483647]
436; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
437; SSE41-NEXT:    pxor %xmm3, %xmm0
438; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [4294967295,4294967295]
439; SSE41-NEXT:    movdqa %xmm6, %xmm5
440; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
441; SSE41-NEXT:    movdqa %xmm6, %xmm7
442; SSE41-NEXT:    pcmpgtd %xmm0, %xmm7
443; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
444; SSE41-NEXT:    pand %xmm5, %xmm0
445; SSE41-NEXT:    por %xmm7, %xmm0
446; SSE41-NEXT:    movapd %xmm4, %xmm5
447; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm5
448; SSE41-NEXT:    movdqa %xmm1, %xmm0
449; SSE41-NEXT:    pxor %xmm3, %xmm0
450; SSE41-NEXT:    movdqa %xmm6, %xmm2
451; SSE41-NEXT:    pcmpeqd %xmm0, %xmm2
452; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
453; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
454; SSE41-NEXT:    pand %xmm2, %xmm0
455; SSE41-NEXT:    por %xmm6, %xmm0
456; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm4
457; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
458; SSE41-NEXT:    movapd %xmm4, %xmm2
459; SSE41-NEXT:    xorpd %xmm3, %xmm2
460; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744069414584320,18446744069414584320]
461; SSE41-NEXT:    movapd %xmm2, %xmm7
462; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
463; SSE41-NEXT:    pcmpgtd %xmm6, %xmm2
464; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
465; SSE41-NEXT:    pand %xmm7, %xmm0
466; SSE41-NEXT:    por %xmm2, %xmm0
467; SSE41-NEXT:    movapd %xmm1, %xmm2
468; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
469; SSE41-NEXT:    xorpd %xmm5, %xmm3
470; SSE41-NEXT:    movapd %xmm3, %xmm4
471; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
472; SSE41-NEXT:    pcmpgtd %xmm6, %xmm3
473; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
474; SSE41-NEXT:    pand %xmm4, %xmm0
475; SSE41-NEXT:    por %xmm3, %xmm0
476; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm1
477; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
478; SSE41-NEXT:    movaps %xmm1, %xmm0
479; SSE41-NEXT:    retq
480;
481; AVX1-LABEL: trunc_ssat_v4i64_v4i32:
482; AVX1:       # %bb.0:
483; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
484; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [2147483647,2147483647]
485; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm3
486; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm4
487; AVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm2, %xmm0
488; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [18446744071562067968,18446744071562067968]
489; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm5
490; AVX1-NEXT:    vblendvpd %xmm3, %xmm1, %xmm2, %xmm1
491; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm1, %xmm2
492; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm4, %xmm1
493; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
494; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
495; AVX1-NEXT:    vzeroupper
496; AVX1-NEXT:    retq
497;
498; AVX2-SLOW-LABEL: trunc_ssat_v4i64_v4i32:
499; AVX2-SLOW:       # %bb.0:
500; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647]
501; AVX2-SLOW-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
502; AVX2-SLOW-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
503; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
504; AVX2-SLOW-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
505; AVX2-SLOW-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
506; AVX2-SLOW-NEXT:    vextractf128 $1, %ymm0, %xmm1
507; AVX2-SLOW-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
508; AVX2-SLOW-NEXT:    vzeroupper
509; AVX2-SLOW-NEXT:    retq
510;
511; AVX2-FAST-LABEL: trunc_ssat_v4i64_v4i32:
512; AVX2-FAST:       # %bb.0:
513; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647]
514; AVX2-FAST-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
515; AVX2-FAST-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
516; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
517; AVX2-FAST-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
518; AVX2-FAST-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
519; AVX2-FAST-NEXT:    vmovapd {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
520; AVX2-FAST-NEXT:    vpermps %ymm0, %ymm1, %ymm0
521; AVX2-FAST-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
522; AVX2-FAST-NEXT:    vzeroupper
523; AVX2-FAST-NEXT:    retq
524;
525; AVX512F-LABEL: trunc_ssat_v4i64_v4i32:
526; AVX512F:       # %bb.0:
527; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
528; AVX512F-NEXT:    vpmovsqd %zmm0, %ymm0
529; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
530; AVX512F-NEXT:    vzeroupper
531; AVX512F-NEXT:    retq
532;
533; AVX512VL-LABEL: trunc_ssat_v4i64_v4i32:
534; AVX512VL:       # %bb.0:
535; AVX512VL-NEXT:    vpmovsqd %ymm0, %xmm0
536; AVX512VL-NEXT:    vzeroupper
537; AVX512VL-NEXT:    retq
538;
539; AVX512BW-LABEL: trunc_ssat_v4i64_v4i32:
540; AVX512BW:       # %bb.0:
541; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
542; AVX512BW-NEXT:    vpmovsqd %zmm0, %ymm0
543; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
544; AVX512BW-NEXT:    vzeroupper
545; AVX512BW-NEXT:    retq
546;
547; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i32:
548; AVX512BWVL:       # %bb.0:
549; AVX512BWVL-NEXT:    vpmovsqd %ymm0, %xmm0
550; AVX512BWVL-NEXT:    vzeroupper
551; AVX512BWVL-NEXT:    retq
552;
553; SKX-LABEL: trunc_ssat_v4i64_v4i32:
554; SKX:       # %bb.0:
555; SKX-NEXT:    vpmovsqd %ymm0, %xmm0
556; SKX-NEXT:    vzeroupper
557; SKX-NEXT:    retq
558  %1 = icmp slt <4 x i64> %a0, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
559  %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
560  %3 = icmp sgt <4 x i64> %2, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
561  %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
562  %5 = trunc <4 x i64> %4 to <4 x i32>
563  ret <4 x i32> %5
564}
565
566
567define <8 x i32> @trunc_ssat_v8i64_v8i32(<8 x i64>* %p0) "min-legal-vector-width"="256" {
568; SSE2-LABEL: trunc_ssat_v8i64_v8i32:
569; SSE2:       # %bb.0:
570; SSE2-NEXT:    movdqa (%rdi), %xmm3
571; SSE2-NEXT:    movdqa 16(%rdi), %xmm5
572; SSE2-NEXT:    movdqa 32(%rdi), %xmm7
573; SSE2-NEXT:    movdqa 48(%rdi), %xmm9
574; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [2147483647,2147483647]
575; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
576; SSE2-NEXT:    movdqa %xmm3, %xmm2
577; SSE2-NEXT:    pxor %xmm0, %xmm2
578; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [4294967295,4294967295]
579; SSE2-NEXT:    movdqa %xmm10, %xmm6
580; SSE2-NEXT:    pcmpgtd %xmm2, %xmm6
581; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2]
582; SSE2-NEXT:    pcmpeqd %xmm10, %xmm2
583; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
584; SSE2-NEXT:    pand %xmm1, %xmm4
585; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
586; SSE2-NEXT:    por %xmm4, %xmm2
587; SSE2-NEXT:    pand %xmm2, %xmm3
588; SSE2-NEXT:    pandn %xmm8, %xmm2
589; SSE2-NEXT:    por %xmm3, %xmm2
590; SSE2-NEXT:    movdqa %xmm5, %xmm1
591; SSE2-NEXT:    pxor %xmm0, %xmm1
592; SSE2-NEXT:    movdqa %xmm10, %xmm3
593; SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
594; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
595; SSE2-NEXT:    pcmpeqd %xmm10, %xmm1
596; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
597; SSE2-NEXT:    pand %xmm4, %xmm1
598; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
599; SSE2-NEXT:    por %xmm1, %xmm3
600; SSE2-NEXT:    pand %xmm3, %xmm5
601; SSE2-NEXT:    pandn %xmm8, %xmm3
602; SSE2-NEXT:    por %xmm5, %xmm3
603; SSE2-NEXT:    movdqa %xmm7, %xmm1
604; SSE2-NEXT:    pxor %xmm0, %xmm1
605; SSE2-NEXT:    movdqa %xmm10, %xmm4
606; SSE2-NEXT:    pcmpgtd %xmm1, %xmm4
607; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
608; SSE2-NEXT:    pcmpeqd %xmm10, %xmm1
609; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
610; SSE2-NEXT:    pand %xmm5, %xmm1
611; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
612; SSE2-NEXT:    por %xmm1, %xmm5
613; SSE2-NEXT:    pand %xmm5, %xmm7
614; SSE2-NEXT:    pandn %xmm8, %xmm5
615; SSE2-NEXT:    por %xmm7, %xmm5
616; SSE2-NEXT:    movdqa %xmm9, %xmm1
617; SSE2-NEXT:    pxor %xmm0, %xmm1
618; SSE2-NEXT:    movdqa %xmm10, %xmm4
619; SSE2-NEXT:    pcmpgtd %xmm1, %xmm4
620; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
621; SSE2-NEXT:    pcmpeqd %xmm10, %xmm1
622; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
623; SSE2-NEXT:    pand %xmm6, %xmm1
624; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm4[1,1,3,3]
625; SSE2-NEXT:    por %xmm1, %xmm7
626; SSE2-NEXT:    pand %xmm7, %xmm9
627; SSE2-NEXT:    pandn %xmm8, %xmm7
628; SSE2-NEXT:    por %xmm9, %xmm7
629; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [18446744071562067968,18446744071562067968]
630; SSE2-NEXT:    movdqa %xmm7, %xmm1
631; SSE2-NEXT:    pxor %xmm0, %xmm1
632; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [18446744069414584320,18446744069414584320]
633; SSE2-NEXT:    movdqa %xmm1, %xmm4
634; SSE2-NEXT:    pcmpgtd %xmm9, %xmm4
635; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
636; SSE2-NEXT:    pcmpeqd %xmm9, %xmm1
637; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
638; SSE2-NEXT:    pand %xmm6, %xmm1
639; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
640; SSE2-NEXT:    por %xmm1, %xmm4
641; SSE2-NEXT:    pand %xmm4, %xmm7
642; SSE2-NEXT:    pandn %xmm8, %xmm4
643; SSE2-NEXT:    por %xmm7, %xmm4
644; SSE2-NEXT:    movdqa %xmm5, %xmm1
645; SSE2-NEXT:    pxor %xmm0, %xmm1
646; SSE2-NEXT:    movdqa %xmm1, %xmm6
647; SSE2-NEXT:    pcmpgtd %xmm9, %xmm6
648; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
649; SSE2-NEXT:    pcmpeqd %xmm9, %xmm1
650; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
651; SSE2-NEXT:    pand %xmm10, %xmm7
652; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3]
653; SSE2-NEXT:    por %xmm7, %xmm1
654; SSE2-NEXT:    pand %xmm1, %xmm5
655; SSE2-NEXT:    pandn %xmm8, %xmm1
656; SSE2-NEXT:    por %xmm5, %xmm1
657; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm4[0,2]
658; SSE2-NEXT:    movdqa %xmm3, %xmm4
659; SSE2-NEXT:    pxor %xmm0, %xmm4
660; SSE2-NEXT:    movdqa %xmm4, %xmm5
661; SSE2-NEXT:    pcmpgtd %xmm9, %xmm5
662; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
663; SSE2-NEXT:    pcmpeqd %xmm9, %xmm4
664; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
665; SSE2-NEXT:    pand %xmm6, %xmm4
666; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
667; SSE2-NEXT:    por %xmm4, %xmm5
668; SSE2-NEXT:    pand %xmm5, %xmm3
669; SSE2-NEXT:    pandn %xmm8, %xmm5
670; SSE2-NEXT:    por %xmm3, %xmm5
671; SSE2-NEXT:    pxor %xmm2, %xmm0
672; SSE2-NEXT:    movdqa %xmm0, %xmm3
673; SSE2-NEXT:    pcmpgtd %xmm9, %xmm3
674; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
675; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
676; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
677; SSE2-NEXT:    pand %xmm4, %xmm6
678; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
679; SSE2-NEXT:    por %xmm6, %xmm0
680; SSE2-NEXT:    pand %xmm0, %xmm2
681; SSE2-NEXT:    pandn %xmm8, %xmm0
682; SSE2-NEXT:    por %xmm2, %xmm0
683; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2]
684; SSE2-NEXT:    retq
685;
686; SSSE3-LABEL: trunc_ssat_v8i64_v8i32:
687; SSSE3:       # %bb.0:
688; SSSE3-NEXT:    movdqa (%rdi), %xmm3
689; SSSE3-NEXT:    movdqa 16(%rdi), %xmm5
690; SSSE3-NEXT:    movdqa 32(%rdi), %xmm7
691; SSSE3-NEXT:    movdqa 48(%rdi), %xmm9
692; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [2147483647,2147483647]
693; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
694; SSSE3-NEXT:    movdqa %xmm3, %xmm2
695; SSSE3-NEXT:    pxor %xmm0, %xmm2
696; SSSE3-NEXT:    movdqa {{.*#+}} xmm10 = [4294967295,4294967295]
697; SSSE3-NEXT:    movdqa %xmm10, %xmm6
698; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm6
699; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2]
700; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm2
701; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
702; SSSE3-NEXT:    pand %xmm1, %xmm4
703; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
704; SSSE3-NEXT:    por %xmm4, %xmm2
705; SSSE3-NEXT:    pand %xmm2, %xmm3
706; SSSE3-NEXT:    pandn %xmm8, %xmm2
707; SSSE3-NEXT:    por %xmm3, %xmm2
708; SSSE3-NEXT:    movdqa %xmm5, %xmm1
709; SSSE3-NEXT:    pxor %xmm0, %xmm1
710; SSSE3-NEXT:    movdqa %xmm10, %xmm3
711; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm3
712; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
713; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm1
714; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
715; SSSE3-NEXT:    pand %xmm4, %xmm1
716; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
717; SSSE3-NEXT:    por %xmm1, %xmm3
718; SSSE3-NEXT:    pand %xmm3, %xmm5
719; SSSE3-NEXT:    pandn %xmm8, %xmm3
720; SSSE3-NEXT:    por %xmm5, %xmm3
721; SSSE3-NEXT:    movdqa %xmm7, %xmm1
722; SSSE3-NEXT:    pxor %xmm0, %xmm1
723; SSSE3-NEXT:    movdqa %xmm10, %xmm4
724; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm4
725; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
726; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm1
727; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
728; SSSE3-NEXT:    pand %xmm5, %xmm1
729; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
730; SSSE3-NEXT:    por %xmm1, %xmm5
731; SSSE3-NEXT:    pand %xmm5, %xmm7
732; SSSE3-NEXT:    pandn %xmm8, %xmm5
733; SSSE3-NEXT:    por %xmm7, %xmm5
734; SSSE3-NEXT:    movdqa %xmm9, %xmm1
735; SSSE3-NEXT:    pxor %xmm0, %xmm1
736; SSSE3-NEXT:    movdqa %xmm10, %xmm4
737; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm4
738; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
739; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm1
740; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
741; SSSE3-NEXT:    pand %xmm6, %xmm1
742; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm4[1,1,3,3]
743; SSSE3-NEXT:    por %xmm1, %xmm7
744; SSSE3-NEXT:    pand %xmm7, %xmm9
745; SSSE3-NEXT:    pandn %xmm8, %xmm7
746; SSSE3-NEXT:    por %xmm9, %xmm7
747; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [18446744071562067968,18446744071562067968]
748; SSSE3-NEXT:    movdqa %xmm7, %xmm1
749; SSSE3-NEXT:    pxor %xmm0, %xmm1
750; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [18446744069414584320,18446744069414584320]
751; SSSE3-NEXT:    movdqa %xmm1, %xmm4
752; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm4
753; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
754; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm1
755; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
756; SSSE3-NEXT:    pand %xmm6, %xmm1
757; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
758; SSSE3-NEXT:    por %xmm1, %xmm4
759; SSSE3-NEXT:    pand %xmm4, %xmm7
760; SSSE3-NEXT:    pandn %xmm8, %xmm4
761; SSSE3-NEXT:    por %xmm7, %xmm4
762; SSSE3-NEXT:    movdqa %xmm5, %xmm1
763; SSSE3-NEXT:    pxor %xmm0, %xmm1
764; SSSE3-NEXT:    movdqa %xmm1, %xmm6
765; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm6
766; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
767; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm1
768; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
769; SSSE3-NEXT:    pand %xmm10, %xmm7
770; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3]
771; SSSE3-NEXT:    por %xmm7, %xmm1
772; SSSE3-NEXT:    pand %xmm1, %xmm5
773; SSSE3-NEXT:    pandn %xmm8, %xmm1
774; SSSE3-NEXT:    por %xmm5, %xmm1
775; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm4[0,2]
776; SSSE3-NEXT:    movdqa %xmm3, %xmm4
777; SSSE3-NEXT:    pxor %xmm0, %xmm4
778; SSSE3-NEXT:    movdqa %xmm4, %xmm5
779; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm5
780; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
781; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm4
782; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
783; SSSE3-NEXT:    pand %xmm6, %xmm4
784; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
785; SSSE3-NEXT:    por %xmm4, %xmm5
786; SSSE3-NEXT:    pand %xmm5, %xmm3
787; SSSE3-NEXT:    pandn %xmm8, %xmm5
788; SSSE3-NEXT:    por %xmm3, %xmm5
789; SSSE3-NEXT:    pxor %xmm2, %xmm0
790; SSSE3-NEXT:    movdqa %xmm0, %xmm3
791; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm3
792; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
793; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
794; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
795; SSSE3-NEXT:    pand %xmm4, %xmm6
796; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
797; SSSE3-NEXT:    por %xmm6, %xmm0
798; SSSE3-NEXT:    pand %xmm0, %xmm2
799; SSSE3-NEXT:    pandn %xmm8, %xmm0
800; SSSE3-NEXT:    por %xmm2, %xmm0
801; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2]
802; SSSE3-NEXT:    retq
803;
804; SSE41-LABEL: trunc_ssat_v8i64_v8i32:
805; SSE41:       # %bb.0:
806; SSE41-NEXT:    movdqa (%rdi), %xmm5
807; SSE41-NEXT:    movdqa 16(%rdi), %xmm4
808; SSE41-NEXT:    movdqa 32(%rdi), %xmm10
809; SSE41-NEXT:    movdqa 48(%rdi), %xmm9
810; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [2147483647,2147483647]
811; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
812; SSE41-NEXT:    movdqa %xmm5, %xmm0
813; SSE41-NEXT:    pxor %xmm3, %xmm0
814; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
815; SSE41-NEXT:    movdqa %xmm2, %xmm7
816; SSE41-NEXT:    pcmpeqd %xmm0, %xmm7
817; SSE41-NEXT:    movdqa %xmm2, %xmm6
818; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
819; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
820; SSE41-NEXT:    pand %xmm7, %xmm0
821; SSE41-NEXT:    por %xmm6, %xmm0
822; SSE41-NEXT:    movapd %xmm1, %xmm8
823; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm8
824; SSE41-NEXT:    movdqa %xmm4, %xmm0
825; SSE41-NEXT:    pxor %xmm3, %xmm0
826; SSE41-NEXT:    movdqa %xmm2, %xmm5
827; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
828; SSE41-NEXT:    movdqa %xmm2, %xmm6
829; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
830; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
831; SSE41-NEXT:    pand %xmm5, %xmm0
832; SSE41-NEXT:    por %xmm6, %xmm0
833; SSE41-NEXT:    movapd %xmm1, %xmm11
834; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm11
835; SSE41-NEXT:    movdqa %xmm10, %xmm0
836; SSE41-NEXT:    pxor %xmm3, %xmm0
837; SSE41-NEXT:    movdqa %xmm2, %xmm4
838; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
839; SSE41-NEXT:    movdqa %xmm2, %xmm6
840; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
841; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
842; SSE41-NEXT:    pand %xmm4, %xmm0
843; SSE41-NEXT:    por %xmm6, %xmm0
844; SSE41-NEXT:    movapd %xmm1, %xmm4
845; SSE41-NEXT:    blendvpd %xmm0, %xmm10, %xmm4
846; SSE41-NEXT:    movdqa %xmm9, %xmm0
847; SSE41-NEXT:    pxor %xmm3, %xmm0
848; SSE41-NEXT:    movdqa %xmm2, %xmm6
849; SSE41-NEXT:    pcmpeqd %xmm0, %xmm6
850; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
851; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
852; SSE41-NEXT:    pand %xmm6, %xmm0
853; SSE41-NEXT:    por %xmm2, %xmm0
854; SSE41-NEXT:    blendvpd %xmm0, %xmm9, %xmm1
855; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968]
856; SSE41-NEXT:    movapd %xmm1, %xmm7
857; SSE41-NEXT:    xorpd %xmm3, %xmm7
858; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744069414584320,18446744069414584320]
859; SSE41-NEXT:    movapd %xmm7, %xmm5
860; SSE41-NEXT:    pcmpeqd %xmm6, %xmm5
861; SSE41-NEXT:    pcmpgtd %xmm6, %xmm7
862; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
863; SSE41-NEXT:    pand %xmm5, %xmm0
864; SSE41-NEXT:    por %xmm7, %xmm0
865; SSE41-NEXT:    movapd %xmm2, %xmm5
866; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm5
867; SSE41-NEXT:    movapd %xmm4, %xmm1
868; SSE41-NEXT:    xorpd %xmm3, %xmm1
869; SSE41-NEXT:    movapd %xmm1, %xmm7
870; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
871; SSE41-NEXT:    pcmpgtd %xmm6, %xmm1
872; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
873; SSE41-NEXT:    pand %xmm7, %xmm0
874; SSE41-NEXT:    por %xmm1, %xmm0
875; SSE41-NEXT:    movapd %xmm2, %xmm1
876; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm1
877; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm5[0,2]
878; SSE41-NEXT:    movapd %xmm11, %xmm4
879; SSE41-NEXT:    xorpd %xmm3, %xmm4
880; SSE41-NEXT:    movapd %xmm4, %xmm5
881; SSE41-NEXT:    pcmpeqd %xmm6, %xmm5
882; SSE41-NEXT:    pcmpgtd %xmm6, %xmm4
883; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
884; SSE41-NEXT:    pand %xmm5, %xmm0
885; SSE41-NEXT:    por %xmm4, %xmm0
886; SSE41-NEXT:    movapd %xmm2, %xmm4
887; SSE41-NEXT:    blendvpd %xmm0, %xmm11, %xmm4
888; SSE41-NEXT:    xorpd %xmm8, %xmm3
889; SSE41-NEXT:    movapd %xmm3, %xmm5
890; SSE41-NEXT:    pcmpeqd %xmm6, %xmm5
891; SSE41-NEXT:    pcmpgtd %xmm6, %xmm3
892; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
893; SSE41-NEXT:    pand %xmm5, %xmm0
894; SSE41-NEXT:    por %xmm3, %xmm0
895; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm2
896; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm4[0,2]
897; SSE41-NEXT:    movaps %xmm2, %xmm0
898; SSE41-NEXT:    retq
899;
900; AVX1-LABEL: trunc_ssat_v8i64_v8i32:
901; AVX1:       # %bb.0:
902; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
903; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
904; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm2
905; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm3
906; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [2147483647,2147483647]
907; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm8
908; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm6
909; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm7
910; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm4, %xmm5
911; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
912; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [18446744071562067968,18446744071562067968]
913; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm0, %xmm9
914; AVX1-NEXT:    vblendvpd %xmm7, %xmm2, %xmm4, %xmm2
915; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm2, %xmm7
916; AVX1-NEXT:    vblendvpd %xmm6, %xmm1, %xmm4, %xmm1
917; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm1, %xmm6
918; AVX1-NEXT:    vblendvpd %xmm8, %xmm3, %xmm4, %xmm3
919; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm4
920; AVX1-NEXT:    vblendvpd %xmm4, %xmm3, %xmm5, %xmm3
921; AVX1-NEXT:    vblendvpd %xmm6, %xmm1, %xmm5, %xmm1
922; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
923; AVX1-NEXT:    vblendvpd %xmm7, %xmm2, %xmm5, %xmm2
924; AVX1-NEXT:    vblendvpd %xmm9, %xmm0, %xmm5, %xmm0
925; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
926; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
927; AVX1-NEXT:    retq
928;
929; AVX2-SLOW-LABEL: trunc_ssat_v8i64_v8i32:
930; AVX2-SLOW:       # %bb.0:
931; AVX2-SLOW-NEXT:    vmovdqa (%rdi), %ymm0
932; AVX2-SLOW-NEXT:    vmovdqa 32(%rdi), %ymm1
933; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647]
934; AVX2-SLOW-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
935; AVX2-SLOW-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
936; AVX2-SLOW-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
937; AVX2-SLOW-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
938; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
939; AVX2-SLOW-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
940; AVX2-SLOW-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
941; AVX2-SLOW-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
942; AVX2-SLOW-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
943; AVX2-SLOW-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
944; AVX2-SLOW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
945; AVX2-SLOW-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
946; AVX2-SLOW-NEXT:    retq
947;
948; AVX2-FAST-LABEL: trunc_ssat_v8i64_v8i32:
949; AVX2-FAST:       # %bb.0:
950; AVX2-FAST-NEXT:    vmovdqa (%rdi), %ymm0
951; AVX2-FAST-NEXT:    vmovdqa 32(%rdi), %ymm1
952; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647]
953; AVX2-FAST-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
954; AVX2-FAST-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
955; AVX2-FAST-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
956; AVX2-FAST-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
957; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
958; AVX2-FAST-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
959; AVX2-FAST-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
960; AVX2-FAST-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
961; AVX2-FAST-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
962; AVX2-FAST-NEXT:    vmovapd {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
963; AVX2-FAST-NEXT:    vpermps %ymm0, %ymm2, %ymm0
964; AVX2-FAST-NEXT:    vpermps %ymm1, %ymm2, %ymm1
965; AVX2-FAST-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
966; AVX2-FAST-NEXT:    retq
967;
968; AVX512-LABEL: trunc_ssat_v8i64_v8i32:
969; AVX512:       # %bb.0:
970; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
971; AVX512-NEXT:    vpmovsqd %zmm0, %ymm0
972; AVX512-NEXT:    retq
973;
974; SKX-LABEL: trunc_ssat_v8i64_v8i32:
975; SKX:       # %bb.0:
976; SKX-NEXT:    vmovdqa (%rdi), %ymm0
977; SKX-NEXT:    vmovdqa 32(%rdi), %ymm1
978; SKX-NEXT:    vpmovsqd %ymm0, %xmm0
979; SKX-NEXT:    vpmovsqd %ymm1, %xmm1
980; SKX-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
981; SKX-NEXT:    retq
982  %a0 = load <8 x i64>, <8 x i64>* %p0
983  %1 = icmp slt <8 x i64> %a0, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
984  %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
985  %3 = icmp sgt <8 x i64> %2, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
986  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
987  %5 = trunc <8 x i64> %4 to <8 x i32>
988  ret <8 x i32> %5
989}
990
991;
992; Signed saturation truncation to vXi16
993;
994
995define <2 x i16> @trunc_ssat_v2i64_v2i16(<2 x i64> %a0) {
996; SSE2-LABEL: trunc_ssat_v2i64_v2i16:
997; SSE2:       # %bb.0:
998; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
999; SSE2-NEXT:    movdqa %xmm0, %xmm2
1000; SSE2-NEXT:    pxor %xmm1, %xmm2
1001; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147516415,2147516415]
1002; SSE2-NEXT:    movdqa %xmm3, %xmm4
1003; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
1004; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1005; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
1006; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1007; SSE2-NEXT:    pand %xmm5, %xmm2
1008; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1009; SSE2-NEXT:    por %xmm2, %xmm3
1010; SSE2-NEXT:    pand %xmm3, %xmm0
1011; SSE2-NEXT:    pandn {{.*}}(%rip), %xmm3
1012; SSE2-NEXT:    por %xmm0, %xmm3
1013; SSE2-NEXT:    pxor %xmm3, %xmm1
1014; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200]
1015; SSE2-NEXT:    movdqa %xmm1, %xmm2
1016; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
1017; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1018; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
1019; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1020; SSE2-NEXT:    pand %xmm4, %xmm0
1021; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
1022; SSE2-NEXT:    por %xmm0, %xmm1
1023; SSE2-NEXT:    pand %xmm1, %xmm3
1024; SSE2-NEXT:    pandn {{.*}}(%rip), %xmm1
1025; SSE2-NEXT:    por %xmm3, %xmm1
1026; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1027; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1028; SSE2-NEXT:    retq
1029;
1030; SSSE3-LABEL: trunc_ssat_v2i64_v2i16:
1031; SSSE3:       # %bb.0:
1032; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
1033; SSSE3-NEXT:    movdqa %xmm0, %xmm2
1034; SSSE3-NEXT:    pxor %xmm1, %xmm2
1035; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [2147516415,2147516415]
1036; SSSE3-NEXT:    movdqa %xmm3, %xmm4
1037; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm4
1038; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1039; SSSE3-NEXT:    pcmpeqd %xmm3, %xmm2
1040; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1041; SSSE3-NEXT:    pand %xmm5, %xmm2
1042; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1043; SSSE3-NEXT:    por %xmm2, %xmm3
1044; SSSE3-NEXT:    pand %xmm3, %xmm0
1045; SSSE3-NEXT:    pandn {{.*}}(%rip), %xmm3
1046; SSSE3-NEXT:    por %xmm0, %xmm3
1047; SSSE3-NEXT:    pxor %xmm3, %xmm1
1048; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200]
1049; SSSE3-NEXT:    movdqa %xmm1, %xmm2
1050; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
1051; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1052; SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
1053; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1054; SSSE3-NEXT:    pand %xmm4, %xmm0
1055; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
1056; SSSE3-NEXT:    por %xmm0, %xmm1
1057; SSSE3-NEXT:    pand %xmm1, %xmm3
1058; SSSE3-NEXT:    pandn {{.*}}(%rip), %xmm1
1059; SSSE3-NEXT:    por %xmm3, %xmm1
1060; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1061; SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1062; SSSE3-NEXT:    retq
1063;
1064; SSE41-LABEL: trunc_ssat_v2i64_v2i16:
1065; SSE41:       # %bb.0:
1066; SSE41-NEXT:    movdqa %xmm0, %xmm1
1067; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [32767,32767]
1068; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
1069; SSE41-NEXT:    pxor %xmm3, %xmm0
1070; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [2147516415,2147516415]
1071; SSE41-NEXT:    movdqa %xmm4, %xmm5
1072; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
1073; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
1074; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
1075; SSE41-NEXT:    pand %xmm5, %xmm0
1076; SSE41-NEXT:    por %xmm4, %xmm0
1077; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
1078; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1079; SSE41-NEXT:    pxor %xmm2, %xmm3
1080; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200]
1081; SSE41-NEXT:    movdqa %xmm3, %xmm4
1082; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
1083; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
1084; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1085; SSE41-NEXT:    pand %xmm4, %xmm0
1086; SSE41-NEXT:    por %xmm3, %xmm0
1087; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
1088; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1089; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1090; SSE41-NEXT:    retq
1091;
1092; AVX1-LABEL: trunc_ssat_v2i64_v2i16:
1093; AVX1:       # %bb.0:
1094; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767]
1095; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1096; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1097; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1098; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1099; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1100; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
1101; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1102; AVX1-NEXT:    retq
1103;
1104; AVX2-SLOW-LABEL: trunc_ssat_v2i64_v2i16:
1105; AVX2-SLOW:       # %bb.0:
1106; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767]
1107; AVX2-SLOW-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1108; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1109; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1110; AVX2-SLOW-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1111; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1112; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
1113; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1114; AVX2-SLOW-NEXT:    retq
1115;
1116; AVX2-FAST-LABEL: trunc_ssat_v2i64_v2i16:
1117; AVX2-FAST:       # %bb.0:
1118; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767]
1119; AVX2-FAST-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1120; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1121; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1122; AVX2-FAST-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1123; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1124; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
1125; AVX2-FAST-NEXT:    retq
1126;
1127; AVX512F-LABEL: trunc_ssat_v2i64_v2i16:
1128; AVX512F:       # %bb.0:
1129; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1130; AVX512F-NEXT:    vpmovsqw %zmm0, %xmm0
1131; AVX512F-NEXT:    vzeroupper
1132; AVX512F-NEXT:    retq
1133;
1134; AVX512VL-LABEL: trunc_ssat_v2i64_v2i16:
1135; AVX512VL:       # %bb.0:
1136; AVX512VL-NEXT:    vpmovsqw %xmm0, %xmm0
1137; AVX512VL-NEXT:    retq
1138;
1139; AVX512BW-LABEL: trunc_ssat_v2i64_v2i16:
1140; AVX512BW:       # %bb.0:
1141; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1142; AVX512BW-NEXT:    vpmovsqw %zmm0, %xmm0
1143; AVX512BW-NEXT:    vzeroupper
1144; AVX512BW-NEXT:    retq
1145;
1146; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i16:
1147; AVX512BWVL:       # %bb.0:
1148; AVX512BWVL-NEXT:    vpmovsqw %xmm0, %xmm0
1149; AVX512BWVL-NEXT:    retq
1150;
1151; SKX-LABEL: trunc_ssat_v2i64_v2i16:
1152; SKX:       # %bb.0:
1153; SKX-NEXT:    vpmovsqw %xmm0, %xmm0
1154; SKX-NEXT:    retq
1155  %1 = icmp slt <2 x i64> %a0, <i64 32767, i64 32767>
1156  %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 32767, i64 32767>
1157  %3 = icmp sgt <2 x i64> %2, <i64 -32768, i64 -32768>
1158  %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -32768, i64 -32768>
1159  %5 = trunc <2 x i64> %4 to <2 x i16>
1160  ret <2 x i16> %5
1161}
1162
1163define void @trunc_ssat_v2i64_v2i16_store(<2 x i64> %a0, <2 x i16> *%p1) {
1164; SSE2-LABEL: trunc_ssat_v2i64_v2i16_store:
1165; SSE2:       # %bb.0:
1166; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
1167; SSE2-NEXT:    movdqa %xmm0, %xmm2
1168; SSE2-NEXT:    pxor %xmm1, %xmm2
1169; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147516415,2147516415]
1170; SSE2-NEXT:    movdqa %xmm3, %xmm4
1171; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
1172; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1173; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
1174; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1175; SSE2-NEXT:    pand %xmm5, %xmm2
1176; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1177; SSE2-NEXT:    por %xmm2, %xmm3
1178; SSE2-NEXT:    pand %xmm3, %xmm0
1179; SSE2-NEXT:    pandn {{.*}}(%rip), %xmm3
1180; SSE2-NEXT:    por %xmm0, %xmm3
1181; SSE2-NEXT:    pxor %xmm3, %xmm1
1182; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200]
1183; SSE2-NEXT:    movdqa %xmm1, %xmm2
1184; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
1185; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1186; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
1187; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1188; SSE2-NEXT:    pand %xmm4, %xmm0
1189; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
1190; SSE2-NEXT:    por %xmm0, %xmm1
1191; SSE2-NEXT:    pand %xmm1, %xmm3
1192; SSE2-NEXT:    pandn {{.*}}(%rip), %xmm1
1193; SSE2-NEXT:    por %xmm3, %xmm1
1194; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1195; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1196; SSE2-NEXT:    movd %xmm0, (%rdi)
1197; SSE2-NEXT:    retq
1198;
1199; SSSE3-LABEL: trunc_ssat_v2i64_v2i16_store:
1200; SSSE3:       # %bb.0:
1201; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
1202; SSSE3-NEXT:    movdqa %xmm0, %xmm2
1203; SSSE3-NEXT:    pxor %xmm1, %xmm2
1204; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [2147516415,2147516415]
1205; SSSE3-NEXT:    movdqa %xmm3, %xmm4
1206; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm4
1207; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1208; SSSE3-NEXT:    pcmpeqd %xmm3, %xmm2
1209; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1210; SSSE3-NEXT:    pand %xmm5, %xmm2
1211; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1212; SSSE3-NEXT:    por %xmm2, %xmm3
1213; SSSE3-NEXT:    pand %xmm3, %xmm0
1214; SSSE3-NEXT:    pandn {{.*}}(%rip), %xmm3
1215; SSSE3-NEXT:    por %xmm0, %xmm3
1216; SSSE3-NEXT:    pxor %xmm3, %xmm1
1217; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200]
1218; SSSE3-NEXT:    movdqa %xmm1, %xmm2
1219; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
1220; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1221; SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
1222; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1223; SSSE3-NEXT:    pand %xmm4, %xmm0
1224; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
1225; SSSE3-NEXT:    por %xmm0, %xmm1
1226; SSSE3-NEXT:    pand %xmm1, %xmm3
1227; SSSE3-NEXT:    pandn {{.*}}(%rip), %xmm1
1228; SSSE3-NEXT:    por %xmm3, %xmm1
1229; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1230; SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1231; SSSE3-NEXT:    movd %xmm0, (%rdi)
1232; SSSE3-NEXT:    retq
1233;
1234; SSE41-LABEL: trunc_ssat_v2i64_v2i16_store:
1235; SSE41:       # %bb.0:
1236; SSE41-NEXT:    movdqa %xmm0, %xmm1
1237; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [32767,32767]
1238; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
1239; SSE41-NEXT:    pxor %xmm3, %xmm0
1240; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [2147516415,2147516415]
1241; SSE41-NEXT:    movdqa %xmm4, %xmm5
1242; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
1243; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
1244; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
1245; SSE41-NEXT:    pand %xmm5, %xmm0
1246; SSE41-NEXT:    por %xmm4, %xmm0
1247; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
1248; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1249; SSE41-NEXT:    pxor %xmm2, %xmm3
1250; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200]
1251; SSE41-NEXT:    movdqa %xmm3, %xmm4
1252; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
1253; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
1254; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1255; SSE41-NEXT:    pand %xmm4, %xmm0
1256; SSE41-NEXT:    por %xmm3, %xmm0
1257; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
1258; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1259; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1260; SSE41-NEXT:    movd %xmm0, (%rdi)
1261; SSE41-NEXT:    retq
1262;
1263; AVX1-LABEL: trunc_ssat_v2i64_v2i16_store:
1264; AVX1:       # %bb.0:
1265; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767]
1266; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1267; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1268; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1269; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1270; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1271; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
1272; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1273; AVX1-NEXT:    vmovd %xmm0, (%rdi)
1274; AVX1-NEXT:    retq
1275;
1276; AVX2-SLOW-LABEL: trunc_ssat_v2i64_v2i16_store:
1277; AVX2-SLOW:       # %bb.0:
1278; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767]
1279; AVX2-SLOW-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1280; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1281; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1282; AVX2-SLOW-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1283; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1284; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
1285; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1286; AVX2-SLOW-NEXT:    vmovd %xmm0, (%rdi)
1287; AVX2-SLOW-NEXT:    retq
1288;
1289; AVX2-FAST-LABEL: trunc_ssat_v2i64_v2i16_store:
1290; AVX2-FAST:       # %bb.0:
1291; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767]
1292; AVX2-FAST-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1293; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1294; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1295; AVX2-FAST-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1296; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1297; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,u,u,u,u,u,u,u,u,u,u,u,u]
1298; AVX2-FAST-NEXT:    vmovd %xmm0, (%rdi)
1299; AVX2-FAST-NEXT:    retq
1300;
1301; AVX512F-LABEL: trunc_ssat_v2i64_v2i16_store:
1302; AVX512F:       # %bb.0:
1303; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1304; AVX512F-NEXT:    vpmovsqw %zmm0, %xmm0
1305; AVX512F-NEXT:    vmovd %xmm0, (%rdi)
1306; AVX512F-NEXT:    vzeroupper
1307; AVX512F-NEXT:    retq
1308;
1309; AVX512VL-LABEL: trunc_ssat_v2i64_v2i16_store:
1310; AVX512VL:       # %bb.0:
1311; AVX512VL-NEXT:    vpmovsqw %xmm0, (%rdi)
1312; AVX512VL-NEXT:    retq
1313;
1314; AVX512BW-LABEL: trunc_ssat_v2i64_v2i16_store:
1315; AVX512BW:       # %bb.0:
1316; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1317; AVX512BW-NEXT:    vpmovsqw %zmm0, %xmm0
1318; AVX512BW-NEXT:    vmovd %xmm0, (%rdi)
1319; AVX512BW-NEXT:    vzeroupper
1320; AVX512BW-NEXT:    retq
1321;
1322; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i16_store:
1323; AVX512BWVL:       # %bb.0:
1324; AVX512BWVL-NEXT:    vpmovsqw %xmm0, (%rdi)
1325; AVX512BWVL-NEXT:    retq
1326;
1327; SKX-LABEL: trunc_ssat_v2i64_v2i16_store:
1328; SKX:       # %bb.0:
1329; SKX-NEXT:    vpmovsqw %xmm0, (%rdi)
1330; SKX-NEXT:    retq
1331  %1 = icmp slt <2 x i64> %a0, <i64 32767, i64 32767>
1332  %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 32767, i64 32767>
1333  %3 = icmp sgt <2 x i64> %2, <i64 -32768, i64 -32768>
1334  %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -32768, i64 -32768>
1335  %5 = trunc <2 x i64> %4 to <2 x i16>
1336  store <2 x i16> %5, <2 x i16> *%p1
1337  ret void
1338}
1339
1340define <4 x i16> @trunc_ssat_v4i64_v4i16(<4 x i64> %a0) {
1341; SSE2-LABEL: trunc_ssat_v4i64_v4i16:
1342; SSE2:       # %bb.0:
1343; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [32767,32767]
1344; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
1345; SSE2-NEXT:    movdqa %xmm0, %xmm3
1346; SSE2-NEXT:    pxor %xmm2, %xmm3
1347; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2147516415,2147516415]
1348; SSE2-NEXT:    movdqa %xmm5, %xmm6
1349; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
1350; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1351; SSE2-NEXT:    pcmpeqd %xmm5, %xmm3
1352; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1353; SSE2-NEXT:    pand %xmm7, %xmm4
1354; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
1355; SSE2-NEXT:    por %xmm4, %xmm3
1356; SSE2-NEXT:    pand %xmm3, %xmm0
1357; SSE2-NEXT:    pandn %xmm8, %xmm3
1358; SSE2-NEXT:    por %xmm0, %xmm3
1359; SSE2-NEXT:    movdqa %xmm1, %xmm0
1360; SSE2-NEXT:    pxor %xmm2, %xmm0
1361; SSE2-NEXT:    movdqa %xmm5, %xmm4
1362; SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
1363; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1364; SSE2-NEXT:    pcmpeqd %xmm5, %xmm0
1365; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1366; SSE2-NEXT:    pand %xmm6, %xmm0
1367; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1368; SSE2-NEXT:    por %xmm0, %xmm4
1369; SSE2-NEXT:    pand %xmm4, %xmm1
1370; SSE2-NEXT:    pandn %xmm8, %xmm4
1371; SSE2-NEXT:    por %xmm1, %xmm4
1372; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1373; SSE2-NEXT:    movdqa %xmm4, %xmm0
1374; SSE2-NEXT:    pxor %xmm2, %xmm0
1375; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [18446744071562035200,18446744071562035200]
1376; SSE2-NEXT:    movdqa %xmm0, %xmm6
1377; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
1378; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1379; SSE2-NEXT:    pcmpeqd %xmm5, %xmm0
1380; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1381; SSE2-NEXT:    pand %xmm7, %xmm0
1382; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1383; SSE2-NEXT:    por %xmm0, %xmm6
1384; SSE2-NEXT:    pand %xmm6, %xmm4
1385; SSE2-NEXT:    pandn %xmm1, %xmm6
1386; SSE2-NEXT:    por %xmm4, %xmm6
1387; SSE2-NEXT:    pxor %xmm3, %xmm2
1388; SSE2-NEXT:    movdqa %xmm2, %xmm0
1389; SSE2-NEXT:    pcmpgtd %xmm5, %xmm0
1390; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
1391; SSE2-NEXT:    pcmpeqd %xmm5, %xmm2
1392; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1393; SSE2-NEXT:    pand %xmm4, %xmm2
1394; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1395; SSE2-NEXT:    por %xmm2, %xmm0
1396; SSE2-NEXT:    pand %xmm0, %xmm3
1397; SSE2-NEXT:    pandn %xmm1, %xmm0
1398; SSE2-NEXT:    por %xmm3, %xmm0
1399; SSE2-NEXT:    packssdw %xmm6, %xmm0
1400; SSE2-NEXT:    packssdw %xmm0, %xmm0
1401; SSE2-NEXT:    retq
1402;
1403; SSSE3-LABEL: trunc_ssat_v4i64_v4i16:
1404; SSSE3:       # %bb.0:
1405; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [32767,32767]
1406; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
1407; SSSE3-NEXT:    movdqa %xmm0, %xmm3
1408; SSSE3-NEXT:    pxor %xmm2, %xmm3
1409; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [2147516415,2147516415]
1410; SSSE3-NEXT:    movdqa %xmm5, %xmm6
1411; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm6
1412; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1413; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm3
1414; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1415; SSSE3-NEXT:    pand %xmm7, %xmm4
1416; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
1417; SSSE3-NEXT:    por %xmm4, %xmm3
1418; SSSE3-NEXT:    pand %xmm3, %xmm0
1419; SSSE3-NEXT:    pandn %xmm8, %xmm3
1420; SSSE3-NEXT:    por %xmm0, %xmm3
1421; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1422; SSSE3-NEXT:    pxor %xmm2, %xmm0
1423; SSSE3-NEXT:    movdqa %xmm5, %xmm4
1424; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm4
1425; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1426; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm0
1427; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1428; SSSE3-NEXT:    pand %xmm6, %xmm0
1429; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1430; SSSE3-NEXT:    por %xmm0, %xmm4
1431; SSSE3-NEXT:    pand %xmm4, %xmm1
1432; SSSE3-NEXT:    pandn %xmm8, %xmm4
1433; SSSE3-NEXT:    por %xmm1, %xmm4
1434; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1435; SSSE3-NEXT:    movdqa %xmm4, %xmm0
1436; SSSE3-NEXT:    pxor %xmm2, %xmm0
1437; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [18446744071562035200,18446744071562035200]
1438; SSSE3-NEXT:    movdqa %xmm0, %xmm6
1439; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm6
1440; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1441; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm0
1442; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1443; SSSE3-NEXT:    pand %xmm7, %xmm0
1444; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1445; SSSE3-NEXT:    por %xmm0, %xmm6
1446; SSSE3-NEXT:    pand %xmm6, %xmm4
1447; SSSE3-NEXT:    pandn %xmm1, %xmm6
1448; SSSE3-NEXT:    por %xmm4, %xmm6
1449; SSSE3-NEXT:    pxor %xmm3, %xmm2
1450; SSSE3-NEXT:    movdqa %xmm2, %xmm0
1451; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm0
1452; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
1453; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm2
1454; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1455; SSSE3-NEXT:    pand %xmm4, %xmm2
1456; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1457; SSSE3-NEXT:    por %xmm2, %xmm0
1458; SSSE3-NEXT:    pand %xmm0, %xmm3
1459; SSSE3-NEXT:    pandn %xmm1, %xmm0
1460; SSSE3-NEXT:    por %xmm3, %xmm0
1461; SSSE3-NEXT:    packssdw %xmm6, %xmm0
1462; SSSE3-NEXT:    packssdw %xmm0, %xmm0
1463; SSSE3-NEXT:    retq
1464;
1465; SSE41-LABEL: trunc_ssat_v4i64_v4i16:
1466; SSE41:       # %bb.0:
1467; SSE41-NEXT:    movdqa %xmm0, %xmm2
1468; SSE41-NEXT:    movapd {{.*#+}} xmm4 = [32767,32767]
1469; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
1470; SSE41-NEXT:    pxor %xmm3, %xmm0
1471; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [2147516415,2147516415]
1472; SSE41-NEXT:    movdqa %xmm6, %xmm5
1473; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
1474; SSE41-NEXT:    movdqa %xmm6, %xmm7
1475; SSE41-NEXT:    pcmpgtd %xmm0, %xmm7
1476; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
1477; SSE41-NEXT:    pand %xmm5, %xmm0
1478; SSE41-NEXT:    por %xmm7, %xmm0
1479; SSE41-NEXT:    movapd %xmm4, %xmm5
1480; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm5
1481; SSE41-NEXT:    movdqa %xmm1, %xmm0
1482; SSE41-NEXT:    pxor %xmm3, %xmm0
1483; SSE41-NEXT:    movdqa %xmm6, %xmm2
1484; SSE41-NEXT:    pcmpeqd %xmm0, %xmm2
1485; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
1486; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
1487; SSE41-NEXT:    pand %xmm2, %xmm0
1488; SSE41-NEXT:    por %xmm6, %xmm0
1489; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm4
1490; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1491; SSE41-NEXT:    movapd %xmm4, %xmm2
1492; SSE41-NEXT:    xorpd %xmm3, %xmm2
1493; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744071562035200,18446744071562035200]
1494; SSE41-NEXT:    movapd %xmm2, %xmm7
1495; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
1496; SSE41-NEXT:    pcmpgtd %xmm6, %xmm2
1497; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
1498; SSE41-NEXT:    pand %xmm7, %xmm0
1499; SSE41-NEXT:    por %xmm2, %xmm0
1500; SSE41-NEXT:    movapd %xmm1, %xmm2
1501; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
1502; SSE41-NEXT:    xorpd %xmm5, %xmm3
1503; SSE41-NEXT:    movapd %xmm3, %xmm4
1504; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
1505; SSE41-NEXT:    pcmpgtd %xmm6, %xmm3
1506; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1507; SSE41-NEXT:    pand %xmm4, %xmm0
1508; SSE41-NEXT:    por %xmm3, %xmm0
1509; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm1
1510; SSE41-NEXT:    packssdw %xmm2, %xmm1
1511; SSE41-NEXT:    packssdw %xmm1, %xmm1
1512; SSE41-NEXT:    movdqa %xmm1, %xmm0
1513; SSE41-NEXT:    retq
1514;
1515; AVX1-LABEL: trunc_ssat_v4i64_v4i16:
1516; AVX1:       # %bb.0:
1517; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1518; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [32767,32767]
1519; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm3
1520; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm4
1521; AVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm2, %xmm0
1522; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [18446744073709518848,18446744073709518848]
1523; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm5
1524; AVX1-NEXT:    vblendvpd %xmm3, %xmm1, %xmm2, %xmm1
1525; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm1, %xmm2
1526; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm4, %xmm1
1527; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
1528; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1529; AVX1-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1530; AVX1-NEXT:    vzeroupper
1531; AVX1-NEXT:    retq
1532;
1533; AVX2-LABEL: trunc_ssat_v4i64_v4i16:
1534; AVX2:       # %bb.0:
1535; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [32767,32767,32767,32767]
1536; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
1537; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1538; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848]
1539; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
1540; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1541; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
1542; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1543; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1544; AVX2-NEXT:    vzeroupper
1545; AVX2-NEXT:    retq
1546;
1547; AVX512F-LABEL: trunc_ssat_v4i64_v4i16:
1548; AVX512F:       # %bb.0:
1549; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1550; AVX512F-NEXT:    vpmovsqw %zmm0, %xmm0
1551; AVX512F-NEXT:    vzeroupper
1552; AVX512F-NEXT:    retq
1553;
1554; AVX512VL-LABEL: trunc_ssat_v4i64_v4i16:
1555; AVX512VL:       # %bb.0:
1556; AVX512VL-NEXT:    vpmovsqw %ymm0, %xmm0
1557; AVX512VL-NEXT:    vzeroupper
1558; AVX512VL-NEXT:    retq
1559;
1560; AVX512BW-LABEL: trunc_ssat_v4i64_v4i16:
1561; AVX512BW:       # %bb.0:
1562; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1563; AVX512BW-NEXT:    vpmovsqw %zmm0, %xmm0
1564; AVX512BW-NEXT:    vzeroupper
1565; AVX512BW-NEXT:    retq
1566;
1567; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i16:
1568; AVX512BWVL:       # %bb.0:
1569; AVX512BWVL-NEXT:    vpmovsqw %ymm0, %xmm0
1570; AVX512BWVL-NEXT:    vzeroupper
1571; AVX512BWVL-NEXT:    retq
1572;
1573; SKX-LABEL: trunc_ssat_v4i64_v4i16:
1574; SKX:       # %bb.0:
1575; SKX-NEXT:    vpmovsqw %ymm0, %xmm0
1576; SKX-NEXT:    vzeroupper
1577; SKX-NEXT:    retq
1578  %1 = icmp slt <4 x i64> %a0, <i64 32767, i64 32767, i64 32767, i64 32767>
1579  %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>
1580  %3 = icmp sgt <4 x i64> %2, <i64 -32768, i64 -32768, i64 -32768, i64 -32768>
1581  %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>
1582  %5 = trunc <4 x i64> %4 to <4 x i16>
1583  ret <4 x i16> %5
1584}
1585
1586define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) {
1587; SSE2-LABEL: trunc_ssat_v4i64_v4i16_store:
1588; SSE2:       # %bb.0:
1589; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [32767,32767]
1590; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
1591; SSE2-NEXT:    movdqa %xmm0, %xmm3
1592; SSE2-NEXT:    pxor %xmm2, %xmm3
1593; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2147516415,2147516415]
1594; SSE2-NEXT:    movdqa %xmm5, %xmm6
1595; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
1596; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1597; SSE2-NEXT:    pcmpeqd %xmm5, %xmm3
1598; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1599; SSE2-NEXT:    pand %xmm7, %xmm4
1600; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
1601; SSE2-NEXT:    por %xmm4, %xmm3
1602; SSE2-NEXT:    pand %xmm3, %xmm0
1603; SSE2-NEXT:    pandn %xmm8, %xmm3
1604; SSE2-NEXT:    por %xmm0, %xmm3
1605; SSE2-NEXT:    movdqa %xmm1, %xmm0
1606; SSE2-NEXT:    pxor %xmm2, %xmm0
1607; SSE2-NEXT:    movdqa %xmm5, %xmm4
1608; SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
1609; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1610; SSE2-NEXT:    pcmpeqd %xmm5, %xmm0
1611; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1612; SSE2-NEXT:    pand %xmm6, %xmm0
1613; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1614; SSE2-NEXT:    por %xmm0, %xmm4
1615; SSE2-NEXT:    pand %xmm4, %xmm1
1616; SSE2-NEXT:    pandn %xmm8, %xmm4
1617; SSE2-NEXT:    por %xmm1, %xmm4
1618; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [18446744073709518848,18446744073709518848]
1619; SSE2-NEXT:    movdqa %xmm4, %xmm1
1620; SSE2-NEXT:    pxor %xmm2, %xmm1
1621; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [18446744071562035200,18446744071562035200]
1622; SSE2-NEXT:    movdqa %xmm1, %xmm6
1623; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
1624; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1625; SSE2-NEXT:    pcmpeqd %xmm5, %xmm1
1626; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1627; SSE2-NEXT:    pand %xmm7, %xmm1
1628; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1629; SSE2-NEXT:    por %xmm1, %xmm6
1630; SSE2-NEXT:    pand %xmm6, %xmm4
1631; SSE2-NEXT:    pandn %xmm0, %xmm6
1632; SSE2-NEXT:    por %xmm4, %xmm6
1633; SSE2-NEXT:    pxor %xmm3, %xmm2
1634; SSE2-NEXT:    movdqa %xmm2, %xmm1
1635; SSE2-NEXT:    pcmpgtd %xmm5, %xmm1
1636; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
1637; SSE2-NEXT:    pcmpeqd %xmm5, %xmm2
1638; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1639; SSE2-NEXT:    pand %xmm4, %xmm2
1640; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1641; SSE2-NEXT:    por %xmm2, %xmm1
1642; SSE2-NEXT:    pand %xmm1, %xmm3
1643; SSE2-NEXT:    pandn %xmm0, %xmm1
1644; SSE2-NEXT:    por %xmm3, %xmm1
1645; SSE2-NEXT:    packssdw %xmm6, %xmm1
1646; SSE2-NEXT:    packssdw %xmm1, %xmm1
1647; SSE2-NEXT:    movq %xmm1, (%rdi)
1648; SSE2-NEXT:    retq
1649;
1650; SSSE3-LABEL: trunc_ssat_v4i64_v4i16_store:
1651; SSSE3:       # %bb.0:
1652; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [32767,32767]
1653; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
1654; SSSE3-NEXT:    movdqa %xmm0, %xmm3
1655; SSSE3-NEXT:    pxor %xmm2, %xmm3
1656; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [2147516415,2147516415]
1657; SSSE3-NEXT:    movdqa %xmm5, %xmm6
1658; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm6
1659; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1660; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm3
1661; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1662; SSSE3-NEXT:    pand %xmm7, %xmm4
1663; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
1664; SSSE3-NEXT:    por %xmm4, %xmm3
1665; SSSE3-NEXT:    pand %xmm3, %xmm0
1666; SSSE3-NEXT:    pandn %xmm8, %xmm3
1667; SSSE3-NEXT:    por %xmm0, %xmm3
1668; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1669; SSSE3-NEXT:    pxor %xmm2, %xmm0
1670; SSSE3-NEXT:    movdqa %xmm5, %xmm4
1671; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm4
1672; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1673; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm0
1674; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1675; SSSE3-NEXT:    pand %xmm6, %xmm0
1676; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1677; SSSE3-NEXT:    por %xmm0, %xmm4
1678; SSSE3-NEXT:    pand %xmm4, %xmm1
1679; SSSE3-NEXT:    pandn %xmm8, %xmm4
1680; SSSE3-NEXT:    por %xmm1, %xmm4
1681; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [18446744073709518848,18446744073709518848]
1682; SSSE3-NEXT:    movdqa %xmm4, %xmm1
1683; SSSE3-NEXT:    pxor %xmm2, %xmm1
1684; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [18446744071562035200,18446744071562035200]
1685; SSSE3-NEXT:    movdqa %xmm1, %xmm6
1686; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm6
1687; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1688; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm1
1689; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1690; SSSE3-NEXT:    pand %xmm7, %xmm1
1691; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1692; SSSE3-NEXT:    por %xmm1, %xmm6
1693; SSSE3-NEXT:    pand %xmm6, %xmm4
1694; SSSE3-NEXT:    pandn %xmm0, %xmm6
1695; SSSE3-NEXT:    por %xmm4, %xmm6
1696; SSSE3-NEXT:    pxor %xmm3, %xmm2
1697; SSSE3-NEXT:    movdqa %xmm2, %xmm1
1698; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm1
1699; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
1700; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm2
1701; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1702; SSSE3-NEXT:    pand %xmm4, %xmm2
1703; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1704; SSSE3-NEXT:    por %xmm2, %xmm1
1705; SSSE3-NEXT:    pand %xmm1, %xmm3
1706; SSSE3-NEXT:    pandn %xmm0, %xmm1
1707; SSSE3-NEXT:    por %xmm3, %xmm1
1708; SSSE3-NEXT:    packssdw %xmm6, %xmm1
1709; SSSE3-NEXT:    packssdw %xmm1, %xmm1
1710; SSSE3-NEXT:    movq %xmm1, (%rdi)
1711; SSSE3-NEXT:    retq
1712;
1713; SSE41-LABEL: trunc_ssat_v4i64_v4i16_store:
1714; SSE41:       # %bb.0:
1715; SSE41-NEXT:    movdqa %xmm0, %xmm2
1716; SSE41-NEXT:    movapd {{.*#+}} xmm4 = [32767,32767]
1717; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
1718; SSE41-NEXT:    pxor %xmm3, %xmm0
1719; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [2147516415,2147516415]
1720; SSE41-NEXT:    movdqa %xmm6, %xmm5
1721; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
1722; SSE41-NEXT:    movdqa %xmm6, %xmm7
1723; SSE41-NEXT:    pcmpgtd %xmm0, %xmm7
1724; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
1725; SSE41-NEXT:    pand %xmm5, %xmm0
1726; SSE41-NEXT:    por %xmm7, %xmm0
1727; SSE41-NEXT:    movapd %xmm4, %xmm5
1728; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm5
1729; SSE41-NEXT:    movdqa %xmm1, %xmm0
1730; SSE41-NEXT:    pxor %xmm3, %xmm0
1731; SSE41-NEXT:    movdqa %xmm6, %xmm2
1732; SSE41-NEXT:    pcmpeqd %xmm0, %xmm2
1733; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
1734; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
1735; SSE41-NEXT:    pand %xmm2, %xmm0
1736; SSE41-NEXT:    por %xmm6, %xmm0
1737; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm4
1738; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1739; SSE41-NEXT:    movapd %xmm4, %xmm2
1740; SSE41-NEXT:    xorpd %xmm3, %xmm2
1741; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744071562035200,18446744071562035200]
1742; SSE41-NEXT:    movapd %xmm2, %xmm7
1743; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
1744; SSE41-NEXT:    pcmpgtd %xmm6, %xmm2
1745; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
1746; SSE41-NEXT:    pand %xmm7, %xmm0
1747; SSE41-NEXT:    por %xmm2, %xmm0
1748; SSE41-NEXT:    movapd %xmm1, %xmm2
1749; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
1750; SSE41-NEXT:    xorpd %xmm5, %xmm3
1751; SSE41-NEXT:    movapd %xmm3, %xmm4
1752; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
1753; SSE41-NEXT:    pcmpgtd %xmm6, %xmm3
1754; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1755; SSE41-NEXT:    pand %xmm4, %xmm0
1756; SSE41-NEXT:    por %xmm3, %xmm0
1757; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm1
1758; SSE41-NEXT:    packssdw %xmm2, %xmm1
1759; SSE41-NEXT:    packssdw %xmm1, %xmm1
1760; SSE41-NEXT:    movq %xmm1, (%rdi)
1761; SSE41-NEXT:    retq
1762;
1763; AVX1-LABEL: trunc_ssat_v4i64_v4i16_store:
1764; AVX1:       # %bb.0:
1765; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1766; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [32767,32767]
1767; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm3
1768; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm4
1769; AVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm2, %xmm0
1770; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [18446744073709518848,18446744073709518848]
1771; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm5
1772; AVX1-NEXT:    vblendvpd %xmm3, %xmm1, %xmm2, %xmm1
1773; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm1, %xmm2
1774; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm4, %xmm1
1775; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
1776; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1777; AVX1-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1778; AVX1-NEXT:    vmovq %xmm0, (%rdi)
1779; AVX1-NEXT:    vzeroupper
1780; AVX1-NEXT:    retq
1781;
1782; AVX2-LABEL: trunc_ssat_v4i64_v4i16_store:
1783; AVX2:       # %bb.0:
1784; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [32767,32767,32767,32767]
1785; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
1786; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1787; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848]
1788; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
1789; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1790; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
1791; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1792; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1793; AVX2-NEXT:    vmovq %xmm0, (%rdi)
1794; AVX2-NEXT:    vzeroupper
1795; AVX2-NEXT:    retq
1796;
1797; AVX512F-LABEL: trunc_ssat_v4i64_v4i16_store:
1798; AVX512F:       # %bb.0:
1799; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1800; AVX512F-NEXT:    vpmovsqw %zmm0, %xmm0
1801; AVX512F-NEXT:    vmovq %xmm0, (%rdi)
1802; AVX512F-NEXT:    vzeroupper
1803; AVX512F-NEXT:    retq
1804;
1805; AVX512VL-LABEL: trunc_ssat_v4i64_v4i16_store:
1806; AVX512VL:       # %bb.0:
1807; AVX512VL-NEXT:    vpmovsqw %ymm0, (%rdi)
1808; AVX512VL-NEXT:    vzeroupper
1809; AVX512VL-NEXT:    retq
1810;
1811; AVX512BW-LABEL: trunc_ssat_v4i64_v4i16_store:
1812; AVX512BW:       # %bb.0:
1813; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1814; AVX512BW-NEXT:    vpmovsqw %zmm0, %xmm0
1815; AVX512BW-NEXT:    vmovq %xmm0, (%rdi)
1816; AVX512BW-NEXT:    vzeroupper
1817; AVX512BW-NEXT:    retq
1818;
1819; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i16_store:
1820; AVX512BWVL:       # %bb.0:
1821; AVX512BWVL-NEXT:    vpmovsqw %ymm0, (%rdi)
1822; AVX512BWVL-NEXT:    vzeroupper
1823; AVX512BWVL-NEXT:    retq
1824;
1825; SKX-LABEL: trunc_ssat_v4i64_v4i16_store:
1826; SKX:       # %bb.0:
1827; SKX-NEXT:    vpmovsqw %ymm0, (%rdi)
1828; SKX-NEXT:    vzeroupper
1829; SKX-NEXT:    retq
1830  %1 = icmp slt <4 x i64> %a0, <i64 32767, i64 32767, i64 32767, i64 32767>
1831  %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>
1832  %3 = icmp sgt <4 x i64> %2, <i64 -32768, i64 -32768, i64 -32768, i64 -32768>
1833  %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>
1834  %5 = trunc <4 x i64> %4 to <4 x i16>
1835  store <4 x i16> %5, <4 x i16> *%p1
1836  ret void
1837}
1838
1839define <8 x i16> @trunc_ssat_v8i64_v8i16(<8 x i64>* %p0) "min-legal-vector-width"="256" {
1840; SSE2-LABEL: trunc_ssat_v8i64_v8i16:
1841; SSE2:       # %bb.0:
1842; SSE2-NEXT:    movdqa (%rdi), %xmm6
1843; SSE2-NEXT:    movdqa 16(%rdi), %xmm9
1844; SSE2-NEXT:    movdqa 32(%rdi), %xmm3
1845; SSE2-NEXT:    movdqa 48(%rdi), %xmm5
1846; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [32767,32767]
1847; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
1848; SSE2-NEXT:    movdqa %xmm3, %xmm2
1849; SSE2-NEXT:    pxor %xmm1, %xmm2
1850; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [2147516415,2147516415]
1851; SSE2-NEXT:    movdqa %xmm10, %xmm7
1852; SSE2-NEXT:    pcmpgtd %xmm2, %xmm7
1853; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
1854; SSE2-NEXT:    pcmpeqd %xmm10, %xmm2
1855; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1856; SSE2-NEXT:    pand %xmm0, %xmm4
1857; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
1858; SSE2-NEXT:    por %xmm4, %xmm2
1859; SSE2-NEXT:    pand %xmm2, %xmm3
1860; SSE2-NEXT:    pandn %xmm8, %xmm2
1861; SSE2-NEXT:    por %xmm3, %xmm2
1862; SSE2-NEXT:    movdqa %xmm5, %xmm0
1863; SSE2-NEXT:    pxor %xmm1, %xmm0
1864; SSE2-NEXT:    movdqa %xmm10, %xmm3
1865; SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
1866; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1867; SSE2-NEXT:    pcmpeqd %xmm10, %xmm0
1868; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1869; SSE2-NEXT:    pand %xmm4, %xmm0
1870; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1871; SSE2-NEXT:    por %xmm0, %xmm3
1872; SSE2-NEXT:    pand %xmm3, %xmm5
1873; SSE2-NEXT:    pandn %xmm8, %xmm3
1874; SSE2-NEXT:    por %xmm5, %xmm3
1875; SSE2-NEXT:    movdqa %xmm6, %xmm0
1876; SSE2-NEXT:    pxor %xmm1, %xmm0
1877; SSE2-NEXT:    movdqa %xmm10, %xmm4
1878; SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
1879; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1880; SSE2-NEXT:    pcmpeqd %xmm10, %xmm0
1881; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1882; SSE2-NEXT:    pand %xmm5, %xmm0
1883; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
1884; SSE2-NEXT:    por %xmm0, %xmm5
1885; SSE2-NEXT:    pand %xmm5, %xmm6
1886; SSE2-NEXT:    pandn %xmm8, %xmm5
1887; SSE2-NEXT:    por %xmm6, %xmm5
1888; SSE2-NEXT:    movdqa %xmm9, %xmm0
1889; SSE2-NEXT:    pxor %xmm1, %xmm0
1890; SSE2-NEXT:    movdqa %xmm10, %xmm4
1891; SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
1892; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1893; SSE2-NEXT:    pcmpeqd %xmm10, %xmm0
1894; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1895; SSE2-NEXT:    pand %xmm6, %xmm0
1896; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm4[1,1,3,3]
1897; SSE2-NEXT:    por %xmm0, %xmm7
1898; SSE2-NEXT:    pand %xmm7, %xmm9
1899; SSE2-NEXT:    pandn %xmm8, %xmm7
1900; SSE2-NEXT:    por %xmm9, %xmm7
1901; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709518848,18446744073709518848]
1902; SSE2-NEXT:    movdqa %xmm7, %xmm0
1903; SSE2-NEXT:    pxor %xmm1, %xmm0
1904; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562035200,18446744071562035200]
1905; SSE2-NEXT:    movdqa %xmm0, %xmm4
1906; SSE2-NEXT:    pcmpgtd %xmm9, %xmm4
1907; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1908; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
1909; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1910; SSE2-NEXT:    pand %xmm6, %xmm0
1911; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1912; SSE2-NEXT:    por %xmm0, %xmm4
1913; SSE2-NEXT:    pand %xmm4, %xmm7
1914; SSE2-NEXT:    pandn %xmm8, %xmm4
1915; SSE2-NEXT:    por %xmm7, %xmm4
1916; SSE2-NEXT:    movdqa %xmm5, %xmm0
1917; SSE2-NEXT:    pxor %xmm1, %xmm0
1918; SSE2-NEXT:    movdqa %xmm0, %xmm6
1919; SSE2-NEXT:    pcmpgtd %xmm9, %xmm6
1920; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
1921; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
1922; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1923; SSE2-NEXT:    pand %xmm10, %xmm7
1924; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
1925; SSE2-NEXT:    por %xmm7, %xmm0
1926; SSE2-NEXT:    pand %xmm0, %xmm5
1927; SSE2-NEXT:    pandn %xmm8, %xmm0
1928; SSE2-NEXT:    por %xmm5, %xmm0
1929; SSE2-NEXT:    packssdw %xmm4, %xmm0
1930; SSE2-NEXT:    movdqa %xmm3, %xmm4
1931; SSE2-NEXT:    pxor %xmm1, %xmm4
1932; SSE2-NEXT:    movdqa %xmm4, %xmm5
1933; SSE2-NEXT:    pcmpgtd %xmm9, %xmm5
1934; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1935; SSE2-NEXT:    pcmpeqd %xmm9, %xmm4
1936; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1937; SSE2-NEXT:    pand %xmm6, %xmm4
1938; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1939; SSE2-NEXT:    por %xmm4, %xmm5
1940; SSE2-NEXT:    pand %xmm5, %xmm3
1941; SSE2-NEXT:    pandn %xmm8, %xmm5
1942; SSE2-NEXT:    por %xmm3, %xmm5
1943; SSE2-NEXT:    pxor %xmm2, %xmm1
1944; SSE2-NEXT:    movdqa %xmm1, %xmm3
1945; SSE2-NEXT:    pcmpgtd %xmm9, %xmm3
1946; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1947; SSE2-NEXT:    pcmpeqd %xmm9, %xmm1
1948; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1949; SSE2-NEXT:    pand %xmm4, %xmm1
1950; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1951; SSE2-NEXT:    por %xmm1, %xmm3
1952; SSE2-NEXT:    pand %xmm3, %xmm2
1953; SSE2-NEXT:    pandn %xmm8, %xmm3
1954; SSE2-NEXT:    por %xmm2, %xmm3
1955; SSE2-NEXT:    packssdw %xmm5, %xmm3
1956; SSE2-NEXT:    packssdw %xmm3, %xmm0
1957; SSE2-NEXT:    retq
1958;
1959; SSSE3-LABEL: trunc_ssat_v8i64_v8i16:
1960; SSSE3:       # %bb.0:
1961; SSSE3-NEXT:    movdqa (%rdi), %xmm6
1962; SSSE3-NEXT:    movdqa 16(%rdi), %xmm9
1963; SSSE3-NEXT:    movdqa 32(%rdi), %xmm3
1964; SSSE3-NEXT:    movdqa 48(%rdi), %xmm5
1965; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [32767,32767]
1966; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
1967; SSSE3-NEXT:    movdqa %xmm3, %xmm2
1968; SSSE3-NEXT:    pxor %xmm1, %xmm2
1969; SSSE3-NEXT:    movdqa {{.*#+}} xmm10 = [2147516415,2147516415]
1970; SSSE3-NEXT:    movdqa %xmm10, %xmm7
1971; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm7
1972; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
1973; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm2
1974; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1975; SSSE3-NEXT:    pand %xmm0, %xmm4
1976; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
1977; SSSE3-NEXT:    por %xmm4, %xmm2
1978; SSSE3-NEXT:    pand %xmm2, %xmm3
1979; SSSE3-NEXT:    pandn %xmm8, %xmm2
1980; SSSE3-NEXT:    por %xmm3, %xmm2
1981; SSSE3-NEXT:    movdqa %xmm5, %xmm0
1982; SSSE3-NEXT:    pxor %xmm1, %xmm0
1983; SSSE3-NEXT:    movdqa %xmm10, %xmm3
1984; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm3
1985; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1986; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm0
1987; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1988; SSSE3-NEXT:    pand %xmm4, %xmm0
1989; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1990; SSSE3-NEXT:    por %xmm0, %xmm3
1991; SSSE3-NEXT:    pand %xmm3, %xmm5
1992; SSSE3-NEXT:    pandn %xmm8, %xmm3
1993; SSSE3-NEXT:    por %xmm5, %xmm3
1994; SSSE3-NEXT:    movdqa %xmm6, %xmm0
1995; SSSE3-NEXT:    pxor %xmm1, %xmm0
1996; SSSE3-NEXT:    movdqa %xmm10, %xmm4
1997; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm4
1998; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1999; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm0
2000; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2001; SSSE3-NEXT:    pand %xmm5, %xmm0
2002; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
2003; SSSE3-NEXT:    por %xmm0, %xmm5
2004; SSSE3-NEXT:    pand %xmm5, %xmm6
2005; SSSE3-NEXT:    pandn %xmm8, %xmm5
2006; SSSE3-NEXT:    por %xmm6, %xmm5
2007; SSSE3-NEXT:    movdqa %xmm9, %xmm0
2008; SSSE3-NEXT:    pxor %xmm1, %xmm0
2009; SSSE3-NEXT:    movdqa %xmm10, %xmm4
2010; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm4
2011; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2012; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm0
2013; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2014; SSSE3-NEXT:    pand %xmm6, %xmm0
2015; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm4[1,1,3,3]
2016; SSSE3-NEXT:    por %xmm0, %xmm7
2017; SSSE3-NEXT:    pand %xmm7, %xmm9
2018; SSSE3-NEXT:    pandn %xmm8, %xmm7
2019; SSSE3-NEXT:    por %xmm9, %xmm7
2020; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709518848,18446744073709518848]
2021; SSSE3-NEXT:    movdqa %xmm7, %xmm0
2022; SSSE3-NEXT:    pxor %xmm1, %xmm0
2023; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562035200,18446744071562035200]
2024; SSSE3-NEXT:    movdqa %xmm0, %xmm4
2025; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm4
2026; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2027; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
2028; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2029; SSSE3-NEXT:    pand %xmm6, %xmm0
2030; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2031; SSSE3-NEXT:    por %xmm0, %xmm4
2032; SSSE3-NEXT:    pand %xmm4, %xmm7
2033; SSSE3-NEXT:    pandn %xmm8, %xmm4
2034; SSSE3-NEXT:    por %xmm7, %xmm4
2035; SSSE3-NEXT:    movdqa %xmm5, %xmm0
2036; SSSE3-NEXT:    pxor %xmm1, %xmm0
2037; SSSE3-NEXT:    movdqa %xmm0, %xmm6
2038; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm6
2039; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
2040; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
2041; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2042; SSSE3-NEXT:    pand %xmm10, %xmm7
2043; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
2044; SSSE3-NEXT:    por %xmm7, %xmm0
2045; SSSE3-NEXT:    pand %xmm0, %xmm5
2046; SSSE3-NEXT:    pandn %xmm8, %xmm0
2047; SSSE3-NEXT:    por %xmm5, %xmm0
2048; SSSE3-NEXT:    packssdw %xmm4, %xmm0
2049; SSSE3-NEXT:    movdqa %xmm3, %xmm4
2050; SSSE3-NEXT:    pxor %xmm1, %xmm4
2051; SSSE3-NEXT:    movdqa %xmm4, %xmm5
2052; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm5
2053; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
2054; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm4
2055; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2056; SSSE3-NEXT:    pand %xmm6, %xmm4
2057; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2058; SSSE3-NEXT:    por %xmm4, %xmm5
2059; SSSE3-NEXT:    pand %xmm5, %xmm3
2060; SSSE3-NEXT:    pandn %xmm8, %xmm5
2061; SSSE3-NEXT:    por %xmm3, %xmm5
2062; SSSE3-NEXT:    pxor %xmm2, %xmm1
2063; SSSE3-NEXT:    movdqa %xmm1, %xmm3
2064; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm3
2065; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2066; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm1
2067; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2068; SSSE3-NEXT:    pand %xmm4, %xmm1
2069; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2070; SSSE3-NEXT:    por %xmm1, %xmm3
2071; SSSE3-NEXT:    pand %xmm3, %xmm2
2072; SSSE3-NEXT:    pandn %xmm8, %xmm3
2073; SSSE3-NEXT:    por %xmm2, %xmm3
2074; SSSE3-NEXT:    packssdw %xmm5, %xmm3
2075; SSSE3-NEXT:    packssdw %xmm3, %xmm0
2076; SSSE3-NEXT:    retq
2077;
2078; SSE41-LABEL: trunc_ssat_v8i64_v8i16:
2079; SSE41:       # %bb.0:
2080; SSE41-NEXT:    movdqa (%rdi), %xmm10
2081; SSE41-NEXT:    movdqa 16(%rdi), %xmm9
2082; SSE41-NEXT:    movdqa 32(%rdi), %xmm3
2083; SSE41-NEXT:    movdqa 48(%rdi), %xmm5
2084; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [32767,32767]
2085; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
2086; SSE41-NEXT:    movdqa %xmm3, %xmm0
2087; SSE41-NEXT:    pxor %xmm2, %xmm0
2088; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [2147516415,2147516415]
2089; SSE41-NEXT:    movdqa %xmm4, %xmm7
2090; SSE41-NEXT:    pcmpeqd %xmm0, %xmm7
2091; SSE41-NEXT:    movdqa %xmm4, %xmm6
2092; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
2093; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
2094; SSE41-NEXT:    pand %xmm7, %xmm0
2095; SSE41-NEXT:    por %xmm6, %xmm0
2096; SSE41-NEXT:    movapd %xmm1, %xmm8
2097; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm8
2098; SSE41-NEXT:    movdqa %xmm5, %xmm0
2099; SSE41-NEXT:    pxor %xmm2, %xmm0
2100; SSE41-NEXT:    movdqa %xmm4, %xmm3
2101; SSE41-NEXT:    pcmpeqd %xmm0, %xmm3
2102; SSE41-NEXT:    movdqa %xmm4, %xmm6
2103; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
2104; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
2105; SSE41-NEXT:    pand %xmm3, %xmm0
2106; SSE41-NEXT:    por %xmm6, %xmm0
2107; SSE41-NEXT:    movapd %xmm1, %xmm11
2108; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm11
2109; SSE41-NEXT:    movdqa %xmm10, %xmm0
2110; SSE41-NEXT:    pxor %xmm2, %xmm0
2111; SSE41-NEXT:    movdqa %xmm4, %xmm3
2112; SSE41-NEXT:    pcmpeqd %xmm0, %xmm3
2113; SSE41-NEXT:    movdqa %xmm4, %xmm5
2114; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
2115; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
2116; SSE41-NEXT:    pand %xmm3, %xmm0
2117; SSE41-NEXT:    por %xmm5, %xmm0
2118; SSE41-NEXT:    movapd %xmm1, %xmm3
2119; SSE41-NEXT:    blendvpd %xmm0, %xmm10, %xmm3
2120; SSE41-NEXT:    movdqa %xmm9, %xmm0
2121; SSE41-NEXT:    pxor %xmm2, %xmm0
2122; SSE41-NEXT:    movdqa %xmm4, %xmm5
2123; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
2124; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
2125; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
2126; SSE41-NEXT:    pand %xmm5, %xmm0
2127; SSE41-NEXT:    por %xmm4, %xmm0
2128; SSE41-NEXT:    blendvpd %xmm0, %xmm9, %xmm1
2129; SSE41-NEXT:    movapd {{.*#+}} xmm5 = [18446744073709518848,18446744073709518848]
2130; SSE41-NEXT:    movapd %xmm1, %xmm4
2131; SSE41-NEXT:    xorpd %xmm2, %xmm4
2132; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744071562035200,18446744071562035200]
2133; SSE41-NEXT:    movapd %xmm4, %xmm7
2134; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
2135; SSE41-NEXT:    pcmpgtd %xmm6, %xmm4
2136; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
2137; SSE41-NEXT:    pand %xmm7, %xmm0
2138; SSE41-NEXT:    por %xmm4, %xmm0
2139; SSE41-NEXT:    movapd %xmm5, %xmm4
2140; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm4
2141; SSE41-NEXT:    movapd %xmm3, %xmm1
2142; SSE41-NEXT:    xorpd %xmm2, %xmm1
2143; SSE41-NEXT:    movapd %xmm1, %xmm7
2144; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
2145; SSE41-NEXT:    pcmpgtd %xmm6, %xmm1
2146; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
2147; SSE41-NEXT:    pand %xmm7, %xmm0
2148; SSE41-NEXT:    por %xmm1, %xmm0
2149; SSE41-NEXT:    movapd %xmm5, %xmm1
2150; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
2151; SSE41-NEXT:    packssdw %xmm4, %xmm1
2152; SSE41-NEXT:    movapd %xmm11, %xmm3
2153; SSE41-NEXT:    xorpd %xmm2, %xmm3
2154; SSE41-NEXT:    movapd %xmm3, %xmm4
2155; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
2156; SSE41-NEXT:    pcmpgtd %xmm6, %xmm3
2157; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
2158; SSE41-NEXT:    pand %xmm4, %xmm0
2159; SSE41-NEXT:    por %xmm3, %xmm0
2160; SSE41-NEXT:    movapd %xmm5, %xmm3
2161; SSE41-NEXT:    blendvpd %xmm0, %xmm11, %xmm3
2162; SSE41-NEXT:    xorpd %xmm8, %xmm2
2163; SSE41-NEXT:    movapd %xmm2, %xmm4
2164; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
2165; SSE41-NEXT:    pcmpgtd %xmm6, %xmm2
2166; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
2167; SSE41-NEXT:    pand %xmm4, %xmm0
2168; SSE41-NEXT:    por %xmm2, %xmm0
2169; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm5
2170; SSE41-NEXT:    packssdw %xmm3, %xmm5
2171; SSE41-NEXT:    packssdw %xmm5, %xmm1
2172; SSE41-NEXT:    movdqa %xmm1, %xmm0
2173; SSE41-NEXT:    retq
2174;
2175; AVX1-LABEL: trunc_ssat_v8i64_v8i16:
2176; AVX1:       # %bb.0:
2177; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
2178; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
2179; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm2
2180; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm3
2181; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [32767,32767]
2182; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm8
2183; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm6
2184; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm7
2185; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm4, %xmm5
2186; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
2187; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [18446744073709518848,18446744073709518848]
2188; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm0, %xmm9
2189; AVX1-NEXT:    vblendvpd %xmm7, %xmm1, %xmm4, %xmm1
2190; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm1, %xmm7
2191; AVX1-NEXT:    vblendvpd %xmm6, %xmm2, %xmm4, %xmm2
2192; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm2, %xmm6
2193; AVX1-NEXT:    vblendvpd %xmm8, %xmm3, %xmm4, %xmm3
2194; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm4
2195; AVX1-NEXT:    vblendvpd %xmm4, %xmm3, %xmm5, %xmm3
2196; AVX1-NEXT:    vblendvpd %xmm6, %xmm2, %xmm5, %xmm2
2197; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
2198; AVX1-NEXT:    vblendvpd %xmm7, %xmm1, %xmm5, %xmm1
2199; AVX1-NEXT:    vblendvpd %xmm9, %xmm0, %xmm5, %xmm0
2200; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2201; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
2202; AVX1-NEXT:    retq
2203;
2204; AVX2-LABEL: trunc_ssat_v8i64_v8i16:
2205; AVX2:       # %bb.0:
2206; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
2207; AVX2-NEXT:    vmovdqa 32(%rdi), %ymm1
2208; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [32767,32767,32767,32767]
2209; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
2210; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
2211; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
2212; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
2213; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848]
2214; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
2215; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
2216; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
2217; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
2218; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
2219; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
2220; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2221; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
2222; AVX2-NEXT:    vzeroupper
2223; AVX2-NEXT:    retq
2224;
2225; AVX512-LABEL: trunc_ssat_v8i64_v8i16:
2226; AVX512:       # %bb.0:
2227; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
2228; AVX512-NEXT:    vpmovsqw %zmm0, %xmm0
2229; AVX512-NEXT:    vzeroupper
2230; AVX512-NEXT:    retq
2231;
2232; SKX-LABEL: trunc_ssat_v8i64_v8i16:
2233; SKX:       # %bb.0:
2234; SKX-NEXT:    vmovdqa (%rdi), %ymm0
2235; SKX-NEXT:    vmovdqa 32(%rdi), %ymm1
2236; SKX-NEXT:    vpmovsqw %ymm1, %xmm1
2237; SKX-NEXT:    vpmovsqw %ymm0, %xmm0
2238; SKX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2239; SKX-NEXT:    vzeroupper
2240; SKX-NEXT:    retq
2241  %a0 = load <8 x i64>, <8 x i64>* %p0
2242  %1 = icmp slt <8 x i64> %a0, <i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767>
2243  %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767>
2244  %3 = icmp sgt <8 x i64> %2, <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768>
2245  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768>
2246  %5 = trunc <8 x i64> %4 to <8 x i16>
2247  ret <8 x i16> %5
2248}
2249
2250define <4 x i16> @trunc_ssat_v4i32_v4i16(<4 x i32> %a0) {
2251; SSE-LABEL: trunc_ssat_v4i32_v4i16:
2252; SSE:       # %bb.0:
2253; SSE-NEXT:    packssdw %xmm0, %xmm0
2254; SSE-NEXT:    retq
2255;
2256; AVX-LABEL: trunc_ssat_v4i32_v4i16:
2257; AVX:       # %bb.0:
2258; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2259; AVX-NEXT:    retq
2260;
2261; AVX512-LABEL: trunc_ssat_v4i32_v4i16:
2262; AVX512:       # %bb.0:
2263; AVX512-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2264; AVX512-NEXT:    retq
2265;
2266; SKX-LABEL: trunc_ssat_v4i32_v4i16:
2267; SKX:       # %bb.0:
2268; SKX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2269; SKX-NEXT:    retq
2270  %1 = icmp slt <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
2271  %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
2272  %3 = icmp sgt <4 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
2273  %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
2274  %5 = trunc <4 x i32> %4 to <4 x i16>
2275  ret <4 x i16> %5
2276}
2277
2278define void @trunc_ssat_v4i32_v4i16_store(<4 x i32> %a0, <4 x i16> *%p1) {
2279; SSE-LABEL: trunc_ssat_v4i32_v4i16_store:
2280; SSE:       # %bb.0:
2281; SSE-NEXT:    packssdw %xmm0, %xmm0
2282; SSE-NEXT:    movq %xmm0, (%rdi)
2283; SSE-NEXT:    retq
2284;
2285; AVX-LABEL: trunc_ssat_v4i32_v4i16_store:
2286; AVX:       # %bb.0:
2287; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2288; AVX-NEXT:    vmovq %xmm0, (%rdi)
2289; AVX-NEXT:    retq
2290;
2291; AVX512F-LABEL: trunc_ssat_v4i32_v4i16_store:
2292; AVX512F:       # %bb.0:
2293; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2294; AVX512F-NEXT:    vmovq %xmm0, (%rdi)
2295; AVX512F-NEXT:    retq
2296;
2297; AVX512VL-LABEL: trunc_ssat_v4i32_v4i16_store:
2298; AVX512VL:       # %bb.0:
2299; AVX512VL-NEXT:    vpmovsdw %xmm0, (%rdi)
2300; AVX512VL-NEXT:    retq
2301;
2302; AVX512BW-LABEL: trunc_ssat_v4i32_v4i16_store:
2303; AVX512BW:       # %bb.0:
2304; AVX512BW-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2305; AVX512BW-NEXT:    vmovq %xmm0, (%rdi)
2306; AVX512BW-NEXT:    retq
2307;
2308; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i16_store:
2309; AVX512BWVL:       # %bb.0:
2310; AVX512BWVL-NEXT:    vpmovsdw %xmm0, (%rdi)
2311; AVX512BWVL-NEXT:    retq
2312;
2313; SKX-LABEL: trunc_ssat_v4i32_v4i16_store:
2314; SKX:       # %bb.0:
2315; SKX-NEXT:    vpmovsdw %xmm0, (%rdi)
2316; SKX-NEXT:    retq
2317  %1 = icmp slt <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
2318  %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
2319  %3 = icmp sgt <4 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
2320  %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
2321  %5 = trunc <4 x i32> %4 to <4 x i16>
2322  store <4 x i16> %5, <4 x i16> *%p1
2323  ret void
2324}
2325
2326define <8 x i16> @trunc_ssat_v8i32_v8i16(<8 x i32> %a0) {
2327; SSE-LABEL: trunc_ssat_v8i32_v8i16:
2328; SSE:       # %bb.0:
2329; SSE-NEXT:    packssdw %xmm1, %xmm0
2330; SSE-NEXT:    retq
2331;
2332; AVX1-LABEL: trunc_ssat_v8i32_v8i16:
2333; AVX1:       # %bb.0:
2334; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
2335; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2336; AVX1-NEXT:    vzeroupper
2337; AVX1-NEXT:    retq
2338;
2339; AVX2-LABEL: trunc_ssat_v8i32_v8i16:
2340; AVX2:       # %bb.0:
2341; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
2342; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2343; AVX2-NEXT:    vzeroupper
2344; AVX2-NEXT:    retq
2345;
2346; AVX512F-LABEL: trunc_ssat_v8i32_v8i16:
2347; AVX512F:       # %bb.0:
2348; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
2349; AVX512F-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2350; AVX512F-NEXT:    vzeroupper
2351; AVX512F-NEXT:    retq
2352;
2353; AVX512VL-LABEL: trunc_ssat_v8i32_v8i16:
2354; AVX512VL:       # %bb.0:
2355; AVX512VL-NEXT:    vpmovsdw %ymm0, %xmm0
2356; AVX512VL-NEXT:    vzeroupper
2357; AVX512VL-NEXT:    retq
2358;
2359; AVX512BW-LABEL: trunc_ssat_v8i32_v8i16:
2360; AVX512BW:       # %bb.0:
2361; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
2362; AVX512BW-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2363; AVX512BW-NEXT:    vzeroupper
2364; AVX512BW-NEXT:    retq
2365;
2366; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i16:
2367; AVX512BWVL:       # %bb.0:
2368; AVX512BWVL-NEXT:    vpmovsdw %ymm0, %xmm0
2369; AVX512BWVL-NEXT:    vzeroupper
2370; AVX512BWVL-NEXT:    retq
2371;
2372; SKX-LABEL: trunc_ssat_v8i32_v8i16:
2373; SKX:       # %bb.0:
2374; SKX-NEXT:    vpmovsdw %ymm0, %xmm0
2375; SKX-NEXT:    vzeroupper
2376; SKX-NEXT:    retq
2377  %1 = icmp slt <8 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
2378  %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
2379  %3 = icmp sgt <8 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
2380  %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
2381  %5 = trunc <8 x i32> %4 to <8 x i16>
2382  ret <8 x i16> %5
2383}
2384
2385define <16 x i16> @trunc_ssat_v16i32_v16i16(<16 x i32>* %p0) "min-legal-vector-width"="256" {
2386; SSE-LABEL: trunc_ssat_v16i32_v16i16:
2387; SSE:       # %bb.0:
2388; SSE-NEXT:    movdqa (%rdi), %xmm0
2389; SSE-NEXT:    movdqa 32(%rdi), %xmm1
2390; SSE-NEXT:    packssdw 16(%rdi), %xmm0
2391; SSE-NEXT:    packssdw 48(%rdi), %xmm1
2392; SSE-NEXT:    retq
2393;
2394; AVX1-LABEL: trunc_ssat_v16i32_v16i16:
2395; AVX1:       # %bb.0:
2396; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
2397; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm1
2398; AVX1-NEXT:    vpackssdw 48(%rdi), %xmm1, %xmm1
2399; AVX1-NEXT:    vpackssdw 16(%rdi), %xmm0, %xmm0
2400; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2401; AVX1-NEXT:    retq
2402;
2403; AVX2-LABEL: trunc_ssat_v16i32_v16i16:
2404; AVX2:       # %bb.0:
2405; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
2406; AVX2-NEXT:    vpackssdw 32(%rdi), %ymm0, %ymm0
2407; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2408; AVX2-NEXT:    retq
2409;
2410; AVX512-LABEL: trunc_ssat_v16i32_v16i16:
2411; AVX512:       # %bb.0:
2412; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
2413; AVX512-NEXT:    vpmovsdw %zmm0, %ymm0
2414; AVX512-NEXT:    retq
2415;
2416; SKX-LABEL: trunc_ssat_v16i32_v16i16:
2417; SKX:       # %bb.0:
2418; SKX-NEXT:    vmovdqa (%rdi), %ymm0
2419; SKX-NEXT:    vpackssdw 32(%rdi), %ymm0, %ymm0
2420; SKX-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2421; SKX-NEXT:    retq
2422  %a0 = load <16 x i32>, <16 x i32>* %p0
2423  %1 = icmp slt <16 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
2424  %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
2425  %3 = icmp sgt <16 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
2426  %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
2427  %5 = trunc <16 x i32> %4 to <16 x i16>
2428  ret <16 x i16> %5
2429}
2430
2431;
2432; Signed saturation truncation to vXi8
2433;
2434
2435define <2 x i8> @trunc_ssat_v2i64_v2i8(<2 x i64> %a0) {
2436; SSE2-LABEL: trunc_ssat_v2i64_v2i8:
2437; SSE2:       # %bb.0:
2438; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
2439; SSE2-NEXT:    movdqa %xmm0, %xmm2
2440; SSE2-NEXT:    pxor %xmm1, %xmm2
2441; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147483775,2147483775]
2442; SSE2-NEXT:    movdqa %xmm3, %xmm4
2443; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
2444; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2445; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
2446; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2447; SSE2-NEXT:    pand %xmm5, %xmm2
2448; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2449; SSE2-NEXT:    por %xmm2, %xmm3
2450; SSE2-NEXT:    pand %xmm3, %xmm0
2451; SSE2-NEXT:    pandn {{.*}}(%rip), %xmm3
2452; SSE2-NEXT:    por %xmm0, %xmm3
2453; SSE2-NEXT:    pxor %xmm3, %xmm1
2454; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840]
2455; SSE2-NEXT:    movdqa %xmm1, %xmm2
2456; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
2457; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2458; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
2459; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2460; SSE2-NEXT:    pand %xmm4, %xmm1
2461; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2462; SSE2-NEXT:    por %xmm1, %xmm0
2463; SSE2-NEXT:    pand %xmm0, %xmm3
2464; SSE2-NEXT:    pandn {{.*}}(%rip), %xmm0
2465; SSE2-NEXT:    por %xmm3, %xmm0
2466; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2467; SSE2-NEXT:    packuswb %xmm0, %xmm0
2468; SSE2-NEXT:    packuswb %xmm0, %xmm0
2469; SSE2-NEXT:    packuswb %xmm0, %xmm0
2470; SSE2-NEXT:    retq
2471;
2472; SSSE3-LABEL: trunc_ssat_v2i64_v2i8:
2473; SSSE3:       # %bb.0:
2474; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
2475; SSSE3-NEXT:    movdqa %xmm0, %xmm2
2476; SSSE3-NEXT:    pxor %xmm1, %xmm2
2477; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [2147483775,2147483775]
2478; SSSE3-NEXT:    movdqa %xmm3, %xmm4
2479; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm4
2480; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2481; SSSE3-NEXT:    pcmpeqd %xmm3, %xmm2
2482; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2483; SSSE3-NEXT:    pand %xmm5, %xmm2
2484; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2485; SSSE3-NEXT:    por %xmm2, %xmm3
2486; SSSE3-NEXT:    pand %xmm3, %xmm0
2487; SSSE3-NEXT:    pandn {{.*}}(%rip), %xmm3
2488; SSSE3-NEXT:    por %xmm0, %xmm3
2489; SSSE3-NEXT:    pxor %xmm3, %xmm1
2490; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840]
2491; SSSE3-NEXT:    movdqa %xmm1, %xmm2
2492; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
2493; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2494; SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
2495; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2496; SSSE3-NEXT:    pand %xmm4, %xmm1
2497; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2498; SSSE3-NEXT:    por %xmm1, %xmm0
2499; SSSE3-NEXT:    pand %xmm0, %xmm3
2500; SSSE3-NEXT:    pandn {{.*}}(%rip), %xmm0
2501; SSSE3-NEXT:    por %xmm3, %xmm0
2502; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2503; SSSE3-NEXT:    retq
2504;
2505; SSE41-LABEL: trunc_ssat_v2i64_v2i8:
2506; SSE41:       # %bb.0:
2507; SSE41-NEXT:    movdqa %xmm0, %xmm1
2508; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [127,127]
2509; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
2510; SSE41-NEXT:    pxor %xmm3, %xmm0
2511; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [2147483775,2147483775]
2512; SSE41-NEXT:    movdqa %xmm4, %xmm5
2513; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
2514; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
2515; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
2516; SSE41-NEXT:    pand %xmm5, %xmm0
2517; SSE41-NEXT:    por %xmm4, %xmm0
2518; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
2519; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
2520; SSE41-NEXT:    pxor %xmm2, %xmm3
2521; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840]
2522; SSE41-NEXT:    movdqa %xmm3, %xmm4
2523; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
2524; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
2525; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
2526; SSE41-NEXT:    pand %xmm4, %xmm0
2527; SSE41-NEXT:    por %xmm3, %xmm0
2528; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
2529; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2530; SSE41-NEXT:    movdqa %xmm1, %xmm0
2531; SSE41-NEXT:    retq
2532;
2533; AVX-LABEL: trunc_ssat_v2i64_v2i8:
2534; AVX:       # %bb.0:
2535; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127]
2536; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
2537; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
2538; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
2539; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
2540; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
2541; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2542; AVX-NEXT:    retq
2543;
2544; AVX512F-LABEL: trunc_ssat_v2i64_v2i8:
2545; AVX512F:       # %bb.0:
2546; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2547; AVX512F-NEXT:    vpmovsqb %zmm0, %xmm0
2548; AVX512F-NEXT:    vzeroupper
2549; AVX512F-NEXT:    retq
2550;
2551; AVX512VL-LABEL: trunc_ssat_v2i64_v2i8:
2552; AVX512VL:       # %bb.0:
2553; AVX512VL-NEXT:    vpmovsqb %xmm0, %xmm0
2554; AVX512VL-NEXT:    retq
2555;
2556; AVX512BW-LABEL: trunc_ssat_v2i64_v2i8:
2557; AVX512BW:       # %bb.0:
2558; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2559; AVX512BW-NEXT:    vpmovsqb %zmm0, %xmm0
2560; AVX512BW-NEXT:    vzeroupper
2561; AVX512BW-NEXT:    retq
2562;
2563; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i8:
2564; AVX512BWVL:       # %bb.0:
2565; AVX512BWVL-NEXT:    vpmovsqb %xmm0, %xmm0
2566; AVX512BWVL-NEXT:    retq
2567;
2568; SKX-LABEL: trunc_ssat_v2i64_v2i8:
2569; SKX:       # %bb.0:
2570; SKX-NEXT:    vpmovsqb %xmm0, %xmm0
2571; SKX-NEXT:    retq
2572  %1 = icmp slt <2 x i64> %a0, <i64 127, i64 127>
2573  %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 127, i64 127>
2574  %3 = icmp sgt <2 x i64> %2, <i64 -128, i64 -128>
2575  %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -128, i64 -128>
2576  %5 = trunc <2 x i64> %4 to <2 x i8>
2577  ret <2 x i8> %5
2578}
2579
2580define void @trunc_ssat_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8> *%p1) {
2581; SSE2-LABEL: trunc_ssat_v2i64_v2i8_store:
2582; SSE2:       # %bb.0:
2583; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
2584; SSE2-NEXT:    movdqa %xmm0, %xmm2
2585; SSE2-NEXT:    pxor %xmm1, %xmm2
2586; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147483775,2147483775]
2587; SSE2-NEXT:    movdqa %xmm3, %xmm4
2588; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
2589; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2590; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
2591; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2592; SSE2-NEXT:    pand %xmm5, %xmm2
2593; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2594; SSE2-NEXT:    por %xmm2, %xmm3
2595; SSE2-NEXT:    pand %xmm3, %xmm0
2596; SSE2-NEXT:    pandn {{.*}}(%rip), %xmm3
2597; SSE2-NEXT:    por %xmm0, %xmm3
2598; SSE2-NEXT:    pxor %xmm3, %xmm1
2599; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840]
2600; SSE2-NEXT:    movdqa %xmm1, %xmm2
2601; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
2602; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2603; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
2604; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2605; SSE2-NEXT:    pand %xmm4, %xmm0
2606; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
2607; SSE2-NEXT:    por %xmm0, %xmm1
2608; SSE2-NEXT:    pand %xmm1, %xmm3
2609; SSE2-NEXT:    pandn {{.*}}(%rip), %xmm1
2610; SSE2-NEXT:    por %xmm3, %xmm1
2611; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
2612; SSE2-NEXT:    packuswb %xmm1, %xmm1
2613; SSE2-NEXT:    packuswb %xmm1, %xmm1
2614; SSE2-NEXT:    packuswb %xmm1, %xmm1
2615; SSE2-NEXT:    movd %xmm1, %eax
2616; SSE2-NEXT:    movw %ax, (%rdi)
2617; SSE2-NEXT:    retq
2618;
2619; SSSE3-LABEL: trunc_ssat_v2i64_v2i8_store:
2620; SSSE3:       # %bb.0:
2621; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
2622; SSSE3-NEXT:    movdqa %xmm0, %xmm2
2623; SSSE3-NEXT:    pxor %xmm1, %xmm2
2624; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [2147483775,2147483775]
2625; SSSE3-NEXT:    movdqa %xmm3, %xmm4
2626; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm4
2627; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2628; SSSE3-NEXT:    pcmpeqd %xmm3, %xmm2
2629; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2630; SSSE3-NEXT:    pand %xmm5, %xmm2
2631; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2632; SSSE3-NEXT:    por %xmm2, %xmm3
2633; SSSE3-NEXT:    pand %xmm3, %xmm0
2634; SSSE3-NEXT:    pandn {{.*}}(%rip), %xmm3
2635; SSSE3-NEXT:    por %xmm0, %xmm3
2636; SSSE3-NEXT:    pxor %xmm3, %xmm1
2637; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840]
2638; SSSE3-NEXT:    movdqa %xmm1, %xmm2
2639; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
2640; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2641; SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
2642; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2643; SSSE3-NEXT:    pand %xmm4, %xmm0
2644; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
2645; SSSE3-NEXT:    por %xmm0, %xmm1
2646; SSSE3-NEXT:    pand %xmm1, %xmm3
2647; SSSE3-NEXT:    pandn {{.*}}(%rip), %xmm1
2648; SSSE3-NEXT:    por %xmm3, %xmm1
2649; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2650; SSSE3-NEXT:    movd %xmm1, %eax
2651; SSSE3-NEXT:    movw %ax, (%rdi)
2652; SSSE3-NEXT:    retq
2653;
2654; SSE41-LABEL: trunc_ssat_v2i64_v2i8_store:
2655; SSE41:       # %bb.0:
2656; SSE41-NEXT:    movdqa %xmm0, %xmm1
2657; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [127,127]
2658; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
2659; SSE41-NEXT:    pxor %xmm3, %xmm0
2660; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [2147483775,2147483775]
2661; SSE41-NEXT:    movdqa %xmm4, %xmm5
2662; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
2663; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
2664; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
2665; SSE41-NEXT:    pand %xmm5, %xmm0
2666; SSE41-NEXT:    por %xmm4, %xmm0
2667; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
2668; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
2669; SSE41-NEXT:    pxor %xmm2, %xmm3
2670; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840]
2671; SSE41-NEXT:    movdqa %xmm3, %xmm4
2672; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
2673; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
2674; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
2675; SSE41-NEXT:    pand %xmm4, %xmm0
2676; SSE41-NEXT:    por %xmm3, %xmm0
2677; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
2678; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2679; SSE41-NEXT:    pextrw $0, %xmm1, (%rdi)
2680; SSE41-NEXT:    retq
2681;
2682; AVX-LABEL: trunc_ssat_v2i64_v2i8_store:
2683; AVX:       # %bb.0:
2684; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127]
2685; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
2686; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
2687; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
2688; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
2689; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
2690; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2691; AVX-NEXT:    vpextrw $0, %xmm0, (%rdi)
2692; AVX-NEXT:    retq
2693;
2694; AVX512F-LABEL: trunc_ssat_v2i64_v2i8_store:
2695; AVX512F:       # %bb.0:
2696; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2697; AVX512F-NEXT:    vpmovsqb %zmm0, %xmm0
2698; AVX512F-NEXT:    vpextrw $0, %xmm0, (%rdi)
2699; AVX512F-NEXT:    vzeroupper
2700; AVX512F-NEXT:    retq
2701;
2702; AVX512VL-LABEL: trunc_ssat_v2i64_v2i8_store:
2703; AVX512VL:       # %bb.0:
2704; AVX512VL-NEXT:    vpmovsqb %xmm0, (%rdi)
2705; AVX512VL-NEXT:    retq
2706;
2707; AVX512BW-LABEL: trunc_ssat_v2i64_v2i8_store:
2708; AVX512BW:       # %bb.0:
2709; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2710; AVX512BW-NEXT:    vpmovsqb %zmm0, %xmm0
2711; AVX512BW-NEXT:    vpextrw $0, %xmm0, (%rdi)
2712; AVX512BW-NEXT:    vzeroupper
2713; AVX512BW-NEXT:    retq
2714;
2715; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i8_store:
2716; AVX512BWVL:       # %bb.0:
2717; AVX512BWVL-NEXT:    vpmovsqb %xmm0, (%rdi)
2718; AVX512BWVL-NEXT:    retq
2719;
2720; SKX-LABEL: trunc_ssat_v2i64_v2i8_store:
2721; SKX:       # %bb.0:
2722; SKX-NEXT:    vpmovsqb %xmm0, (%rdi)
2723; SKX-NEXT:    retq
2724  %1 = icmp slt <2 x i64> %a0, <i64 127, i64 127>
2725  %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 127, i64 127>
2726  %3 = icmp sgt <2 x i64> %2, <i64 -128, i64 -128>
2727  %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -128, i64 -128>
2728  %5 = trunc <2 x i64> %4 to <2 x i8>
2729  store <2 x i8> %5, <2 x i8> *%p1
2730  ret void
2731}
2732
2733define <4 x i8> @trunc_ssat_v4i64_v4i8(<4 x i64> %a0) {
2734; SSE2-LABEL: trunc_ssat_v4i64_v4i8:
2735; SSE2:       # %bb.0:
2736; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [127,127]
2737; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
2738; SSE2-NEXT:    movdqa %xmm1, %xmm3
2739; SSE2-NEXT:    pxor %xmm2, %xmm3
2740; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2147483775,2147483775]
2741; SSE2-NEXT:    movdqa %xmm5, %xmm6
2742; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
2743; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
2744; SSE2-NEXT:    pcmpeqd %xmm5, %xmm3
2745; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
2746; SSE2-NEXT:    pand %xmm7, %xmm4
2747; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
2748; SSE2-NEXT:    por %xmm4, %xmm3
2749; SSE2-NEXT:    pand %xmm3, %xmm1
2750; SSE2-NEXT:    pandn %xmm8, %xmm3
2751; SSE2-NEXT:    por %xmm1, %xmm3
2752; SSE2-NEXT:    movdqa %xmm0, %xmm1
2753; SSE2-NEXT:    pxor %xmm2, %xmm1
2754; SSE2-NEXT:    movdqa %xmm5, %xmm4
2755; SSE2-NEXT:    pcmpgtd %xmm1, %xmm4
2756; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2757; SSE2-NEXT:    pcmpeqd %xmm5, %xmm1
2758; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2759; SSE2-NEXT:    pand %xmm6, %xmm1
2760; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2761; SSE2-NEXT:    por %xmm1, %xmm4
2762; SSE2-NEXT:    pand %xmm4, %xmm0
2763; SSE2-NEXT:    pandn %xmm8, %xmm4
2764; SSE2-NEXT:    por %xmm0, %xmm4
2765; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488]
2766; SSE2-NEXT:    movdqa %xmm4, %xmm0
2767; SSE2-NEXT:    pxor %xmm2, %xmm0
2768; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [18446744071562067840,18446744071562067840]
2769; SSE2-NEXT:    movdqa %xmm0, %xmm6
2770; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
2771; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
2772; SSE2-NEXT:    pcmpeqd %xmm5, %xmm0
2773; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
2774; SSE2-NEXT:    pand %xmm7, %xmm1
2775; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
2776; SSE2-NEXT:    por %xmm1, %xmm0
2777; SSE2-NEXT:    pand %xmm0, %xmm4
2778; SSE2-NEXT:    pandn %xmm8, %xmm0
2779; SSE2-NEXT:    por %xmm4, %xmm0
2780; SSE2-NEXT:    pxor %xmm3, %xmm2
2781; SSE2-NEXT:    movdqa %xmm2, %xmm1
2782; SSE2-NEXT:    pcmpgtd %xmm5, %xmm1
2783; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
2784; SSE2-NEXT:    pcmpeqd %xmm5, %xmm2
2785; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2786; SSE2-NEXT:    pand %xmm4, %xmm2
2787; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2788; SSE2-NEXT:    por %xmm2, %xmm1
2789; SSE2-NEXT:    pand %xmm1, %xmm3
2790; SSE2-NEXT:    pandn %xmm8, %xmm1
2791; SSE2-NEXT:    por %xmm3, %xmm1
2792; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0]
2793; SSE2-NEXT:    pand %xmm2, %xmm1
2794; SSE2-NEXT:    pand %xmm2, %xmm0
2795; SSE2-NEXT:    packuswb %xmm1, %xmm0
2796; SSE2-NEXT:    packuswb %xmm0, %xmm0
2797; SSE2-NEXT:    packuswb %xmm0, %xmm0
2798; SSE2-NEXT:    retq
2799;
2800; SSSE3-LABEL: trunc_ssat_v4i64_v4i8:
2801; SSSE3:       # %bb.0:
2802; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [127,127]
2803; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
2804; SSSE3-NEXT:    movdqa %xmm1, %xmm3
2805; SSSE3-NEXT:    pxor %xmm2, %xmm3
2806; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [2147483775,2147483775]
2807; SSSE3-NEXT:    movdqa %xmm5, %xmm6
2808; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm6
2809; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
2810; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm3
2811; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
2812; SSSE3-NEXT:    pand %xmm7, %xmm4
2813; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
2814; SSSE3-NEXT:    por %xmm4, %xmm3
2815; SSSE3-NEXT:    pand %xmm3, %xmm1
2816; SSSE3-NEXT:    pandn %xmm8, %xmm3
2817; SSSE3-NEXT:    por %xmm1, %xmm3
2818; SSSE3-NEXT:    movdqa %xmm0, %xmm1
2819; SSSE3-NEXT:    pxor %xmm2, %xmm1
2820; SSSE3-NEXT:    movdqa %xmm5, %xmm4
2821; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm4
2822; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2823; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm1
2824; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2825; SSSE3-NEXT:    pand %xmm6, %xmm1
2826; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2827; SSSE3-NEXT:    por %xmm1, %xmm4
2828; SSSE3-NEXT:    pand %xmm4, %xmm0
2829; SSSE3-NEXT:    pandn %xmm8, %xmm4
2830; SSSE3-NEXT:    por %xmm0, %xmm4
2831; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488]
2832; SSSE3-NEXT:    movdqa %xmm4, %xmm0
2833; SSSE3-NEXT:    pxor %xmm2, %xmm0
2834; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [18446744071562067840,18446744071562067840]
2835; SSSE3-NEXT:    movdqa %xmm0, %xmm6
2836; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm6
2837; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
2838; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm0
2839; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
2840; SSSE3-NEXT:    pand %xmm7, %xmm1
2841; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
2842; SSSE3-NEXT:    por %xmm1, %xmm0
2843; SSSE3-NEXT:    pand %xmm0, %xmm4
2844; SSSE3-NEXT:    pandn %xmm8, %xmm0
2845; SSSE3-NEXT:    por %xmm4, %xmm0
2846; SSSE3-NEXT:    pxor %xmm3, %xmm2
2847; SSSE3-NEXT:    movdqa %xmm2, %xmm1
2848; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm1
2849; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
2850; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm2
2851; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2852; SSSE3-NEXT:    pand %xmm4, %xmm2
2853; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2854; SSSE3-NEXT:    por %xmm2, %xmm1
2855; SSSE3-NEXT:    pand %xmm1, %xmm3
2856; SSSE3-NEXT:    pandn %xmm8, %xmm1
2857; SSSE3-NEXT:    por %xmm3, %xmm1
2858; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
2859; SSSE3-NEXT:    pshufb %xmm2, %xmm1
2860; SSSE3-NEXT:    pshufb %xmm2, %xmm0
2861; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2862; SSSE3-NEXT:    retq
2863;
2864; SSE41-LABEL: trunc_ssat_v4i64_v4i8:
2865; SSE41:       # %bb.0:
2866; SSE41-NEXT:    movdqa %xmm0, %xmm2
2867; SSE41-NEXT:    movapd {{.*#+}} xmm4 = [127,127]
2868; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
2869; SSE41-NEXT:    movdqa %xmm1, %xmm0
2870; SSE41-NEXT:    pxor %xmm3, %xmm0
2871; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [2147483775,2147483775]
2872; SSE41-NEXT:    movdqa %xmm6, %xmm5
2873; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
2874; SSE41-NEXT:    movdqa %xmm6, %xmm7
2875; SSE41-NEXT:    pcmpgtd %xmm0, %xmm7
2876; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
2877; SSE41-NEXT:    pand %xmm5, %xmm0
2878; SSE41-NEXT:    por %xmm7, %xmm0
2879; SSE41-NEXT:    movapd %xmm4, %xmm5
2880; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm5
2881; SSE41-NEXT:    movdqa %xmm2, %xmm0
2882; SSE41-NEXT:    pxor %xmm3, %xmm0
2883; SSE41-NEXT:    movdqa %xmm6, %xmm1
2884; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
2885; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
2886; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
2887; SSE41-NEXT:    pand %xmm1, %xmm0
2888; SSE41-NEXT:    por %xmm6, %xmm0
2889; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm4
2890; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [18446744073709551488,18446744073709551488]
2891; SSE41-NEXT:    movapd %xmm4, %xmm1
2892; SSE41-NEXT:    xorpd %xmm3, %xmm1
2893; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840]
2894; SSE41-NEXT:    movapd %xmm1, %xmm7
2895; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
2896; SSE41-NEXT:    pcmpgtd %xmm6, %xmm1
2897; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
2898; SSE41-NEXT:    pand %xmm7, %xmm0
2899; SSE41-NEXT:    por %xmm1, %xmm0
2900; SSE41-NEXT:    movapd %xmm2, %xmm1
2901; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm1
2902; SSE41-NEXT:    xorpd %xmm5, %xmm3
2903; SSE41-NEXT:    movapd %xmm3, %xmm4
2904; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
2905; SSE41-NEXT:    pcmpgtd %xmm6, %xmm3
2906; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
2907; SSE41-NEXT:    pand %xmm4, %xmm0
2908; SSE41-NEXT:    por %xmm3, %xmm0
2909; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm2
2910; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
2911; SSE41-NEXT:    pshufb %xmm0, %xmm2
2912; SSE41-NEXT:    pshufb %xmm0, %xmm1
2913; SSE41-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2914; SSE41-NEXT:    movdqa %xmm1, %xmm0
2915; SSE41-NEXT:    retq
2916;
2917; AVX1-LABEL: trunc_ssat_v4i64_v4i8:
2918; AVX1:       # %bb.0:
2919; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
2920; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [127,127]
2921; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm3
2922; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm4
2923; AVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm2, %xmm0
2924; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [18446744073709551488,18446744073709551488]
2925; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm5
2926; AVX1-NEXT:    vblendvpd %xmm3, %xmm1, %xmm2, %xmm1
2927; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm1, %xmm2
2928; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm4, %xmm1
2929; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
2930; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
2931; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
2932; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
2933; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2934; AVX1-NEXT:    vzeroupper
2935; AVX1-NEXT:    retq
2936;
2937; AVX2-LABEL: trunc_ssat_v4i64_v4i8:
2938; AVX2:       # %bb.0:
2939; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [127,127,127,127]
2940; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
2941; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
2942; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
2943; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
2944; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
2945; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
2946; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
2947; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
2948; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
2949; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2950; AVX2-NEXT:    vzeroupper
2951; AVX2-NEXT:    retq
2952;
2953; AVX512F-LABEL: trunc_ssat_v4i64_v4i8:
2954; AVX512F:       # %bb.0:
2955; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2956; AVX512F-NEXT:    vpmovsqb %zmm0, %xmm0
2957; AVX512F-NEXT:    vzeroupper
2958; AVX512F-NEXT:    retq
2959;
2960; AVX512VL-LABEL: trunc_ssat_v4i64_v4i8:
2961; AVX512VL:       # %bb.0:
2962; AVX512VL-NEXT:    vpmovsqb %ymm0, %xmm0
2963; AVX512VL-NEXT:    vzeroupper
2964; AVX512VL-NEXT:    retq
2965;
2966; AVX512BW-LABEL: trunc_ssat_v4i64_v4i8:
2967; AVX512BW:       # %bb.0:
2968; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2969; AVX512BW-NEXT:    vpmovsqb %zmm0, %xmm0
2970; AVX512BW-NEXT:    vzeroupper
2971; AVX512BW-NEXT:    retq
2972;
2973; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i8:
2974; AVX512BWVL:       # %bb.0:
2975; AVX512BWVL-NEXT:    vpmovsqb %ymm0, %xmm0
2976; AVX512BWVL-NEXT:    vzeroupper
2977; AVX512BWVL-NEXT:    retq
2978;
2979; SKX-LABEL: trunc_ssat_v4i64_v4i8:
2980; SKX:       # %bb.0:
2981; SKX-NEXT:    vpmovsqb %ymm0, %xmm0
2982; SKX-NEXT:    vzeroupper
2983; SKX-NEXT:    retq
2984  %1 = icmp slt <4 x i64> %a0, <i64 127, i64 127, i64 127, i64 127>
2985  %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 127, i64 127, i64 127, i64 127>
2986  %3 = icmp sgt <4 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128>
2987  %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128>
2988  %5 = trunc <4 x i64> %4 to <4 x i8>
2989  ret <4 x i8> %5
2990}
2991
2992define void @trunc_ssat_v4i64_v4i8_store(<4 x i64> %a0, <4 x i8> *%p1) {
2993; SSE2-LABEL: trunc_ssat_v4i64_v4i8_store:
2994; SSE2:       # %bb.0:
2995; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [127,127]
2996; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
2997; SSE2-NEXT:    movdqa %xmm1, %xmm3
2998; SSE2-NEXT:    pxor %xmm2, %xmm3
2999; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2147483775,2147483775]
3000; SSE2-NEXT:    movdqa %xmm5, %xmm6
3001; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
3002; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
3003; SSE2-NEXT:    pcmpeqd %xmm5, %xmm3
3004; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
3005; SSE2-NEXT:    pand %xmm7, %xmm4
3006; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
3007; SSE2-NEXT:    por %xmm4, %xmm3
3008; SSE2-NEXT:    pand %xmm3, %xmm1
3009; SSE2-NEXT:    pandn %xmm8, %xmm3
3010; SSE2-NEXT:    por %xmm1, %xmm3
3011; SSE2-NEXT:    movdqa %xmm0, %xmm1
3012; SSE2-NEXT:    pxor %xmm2, %xmm1
3013; SSE2-NEXT:    movdqa %xmm5, %xmm4
3014; SSE2-NEXT:    pcmpgtd %xmm1, %xmm4
3015; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
3016; SSE2-NEXT:    pcmpeqd %xmm5, %xmm1
3017; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3018; SSE2-NEXT:    pand %xmm6, %xmm1
3019; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
3020; SSE2-NEXT:    por %xmm1, %xmm4
3021; SSE2-NEXT:    pand %xmm4, %xmm0
3022; SSE2-NEXT:    pandn %xmm8, %xmm4
3023; SSE2-NEXT:    por %xmm0, %xmm4
3024; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488]
3025; SSE2-NEXT:    movdqa %xmm4, %xmm0
3026; SSE2-NEXT:    pxor %xmm2, %xmm0
3027; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [18446744071562067840,18446744071562067840]
3028; SSE2-NEXT:    movdqa %xmm0, %xmm6
3029; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
3030; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
3031; SSE2-NEXT:    pcmpeqd %xmm5, %xmm0
3032; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
3033; SSE2-NEXT:    pand %xmm7, %xmm1
3034; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
3035; SSE2-NEXT:    por %xmm1, %xmm0
3036; SSE2-NEXT:    pand %xmm0, %xmm4
3037; SSE2-NEXT:    pandn %xmm8, %xmm0
3038; SSE2-NEXT:    por %xmm4, %xmm0
3039; SSE2-NEXT:    pxor %xmm3, %xmm2
3040; SSE2-NEXT:    movdqa %xmm2, %xmm1
3041; SSE2-NEXT:    pcmpgtd %xmm5, %xmm1
3042; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
3043; SSE2-NEXT:    pcmpeqd %xmm5, %xmm2
3044; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
3045; SSE2-NEXT:    pand %xmm4, %xmm2
3046; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3047; SSE2-NEXT:    por %xmm2, %xmm1
3048; SSE2-NEXT:    pand %xmm1, %xmm3
3049; SSE2-NEXT:    pandn %xmm8, %xmm1
3050; SSE2-NEXT:    por %xmm3, %xmm1
3051; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0]
3052; SSE2-NEXT:    pand %xmm2, %xmm1
3053; SSE2-NEXT:    pand %xmm2, %xmm0
3054; SSE2-NEXT:    packuswb %xmm1, %xmm0
3055; SSE2-NEXT:    packuswb %xmm0, %xmm0
3056; SSE2-NEXT:    packuswb %xmm0, %xmm0
3057; SSE2-NEXT:    movd %xmm0, (%rdi)
3058; SSE2-NEXT:    retq
3059;
3060; SSSE3-LABEL: trunc_ssat_v4i64_v4i8_store:
3061; SSSE3:       # %bb.0:
3062; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [127,127]
3063; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
3064; SSSE3-NEXT:    movdqa %xmm1, %xmm3
3065; SSSE3-NEXT:    pxor %xmm2, %xmm3
3066; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [2147483775,2147483775]
3067; SSSE3-NEXT:    movdqa %xmm5, %xmm6
3068; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm6
3069; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
3070; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm3
3071; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
3072; SSSE3-NEXT:    pand %xmm7, %xmm4
3073; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
3074; SSSE3-NEXT:    por %xmm4, %xmm3
3075; SSSE3-NEXT:    pand %xmm3, %xmm1
3076; SSSE3-NEXT:    pandn %xmm8, %xmm3
3077; SSSE3-NEXT:    por %xmm1, %xmm3
3078; SSSE3-NEXT:    movdqa %xmm0, %xmm1
3079; SSSE3-NEXT:    pxor %xmm2, %xmm1
3080; SSSE3-NEXT:    movdqa %xmm5, %xmm4
3081; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm4
3082; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
3083; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm1
3084; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3085; SSSE3-NEXT:    pand %xmm6, %xmm1
3086; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
3087; SSSE3-NEXT:    por %xmm1, %xmm4
3088; SSSE3-NEXT:    pand %xmm4, %xmm0
3089; SSSE3-NEXT:    pandn %xmm8, %xmm4
3090; SSSE3-NEXT:    por %xmm0, %xmm4
3091; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488]
3092; SSSE3-NEXT:    movdqa %xmm4, %xmm1
3093; SSSE3-NEXT:    pxor %xmm2, %xmm1
3094; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [18446744071562067840,18446744071562067840]
3095; SSSE3-NEXT:    movdqa %xmm1, %xmm6
3096; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm6
3097; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
3098; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm1
3099; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
3100; SSSE3-NEXT:    pand %xmm7, %xmm0
3101; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3]
3102; SSSE3-NEXT:    por %xmm0, %xmm1
3103; SSSE3-NEXT:    pand %xmm1, %xmm4
3104; SSSE3-NEXT:    pandn %xmm8, %xmm1
3105; SSSE3-NEXT:    por %xmm4, %xmm1
3106; SSSE3-NEXT:    pxor %xmm3, %xmm2
3107; SSSE3-NEXT:    movdqa %xmm2, %xmm0
3108; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm0
3109; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
3110; SSSE3-NEXT:    pcmpeqd %xmm5, %xmm2
3111; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
3112; SSSE3-NEXT:    pand %xmm4, %xmm2
3113; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3114; SSSE3-NEXT:    por %xmm2, %xmm0
3115; SSSE3-NEXT:    pand %xmm0, %xmm3
3116; SSSE3-NEXT:    pandn %xmm8, %xmm0
3117; SSSE3-NEXT:    por %xmm3, %xmm0
3118; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
3119; SSSE3-NEXT:    pshufb %xmm2, %xmm0
3120; SSSE3-NEXT:    pshufb %xmm2, %xmm1
3121; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3122; SSSE3-NEXT:    movd %xmm1, (%rdi)
3123; SSSE3-NEXT:    retq
3124;
3125; SSE41-LABEL: trunc_ssat_v4i64_v4i8_store:
3126; SSE41:       # %bb.0:
3127; SSE41-NEXT:    movdqa %xmm0, %xmm2
3128; SSE41-NEXT:    movapd {{.*#+}} xmm4 = [127,127]
3129; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
3130; SSE41-NEXT:    movdqa %xmm1, %xmm0
3131; SSE41-NEXT:    pxor %xmm3, %xmm0
3132; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [2147483775,2147483775]
3133; SSE41-NEXT:    movdqa %xmm6, %xmm5
3134; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
3135; SSE41-NEXT:    movdqa %xmm6, %xmm7
3136; SSE41-NEXT:    pcmpgtd %xmm0, %xmm7
3137; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
3138; SSE41-NEXT:    pand %xmm5, %xmm0
3139; SSE41-NEXT:    por %xmm7, %xmm0
3140; SSE41-NEXT:    movapd %xmm4, %xmm5
3141; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm5
3142; SSE41-NEXT:    movdqa %xmm2, %xmm0
3143; SSE41-NEXT:    pxor %xmm3, %xmm0
3144; SSE41-NEXT:    movdqa %xmm6, %xmm1
3145; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
3146; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
3147; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
3148; SSE41-NEXT:    pand %xmm1, %xmm0
3149; SSE41-NEXT:    por %xmm6, %xmm0
3150; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm4
3151; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
3152; SSE41-NEXT:    movapd %xmm4, %xmm2
3153; SSE41-NEXT:    xorpd %xmm3, %xmm2
3154; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840]
3155; SSE41-NEXT:    movapd %xmm2, %xmm7
3156; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
3157; SSE41-NEXT:    pcmpgtd %xmm6, %xmm2
3158; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
3159; SSE41-NEXT:    pand %xmm7, %xmm0
3160; SSE41-NEXT:    por %xmm2, %xmm0
3161; SSE41-NEXT:    movapd %xmm1, %xmm2
3162; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
3163; SSE41-NEXT:    xorpd %xmm5, %xmm3
3164; SSE41-NEXT:    movapd %xmm3, %xmm4
3165; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
3166; SSE41-NEXT:    pcmpgtd %xmm6, %xmm3
3167; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
3168; SSE41-NEXT:    pand %xmm4, %xmm0
3169; SSE41-NEXT:    por %xmm3, %xmm0
3170; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm1
3171; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
3172; SSE41-NEXT:    pshufb %xmm0, %xmm1
3173; SSE41-NEXT:    pshufb %xmm0, %xmm2
3174; SSE41-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
3175; SSE41-NEXT:    movd %xmm2, (%rdi)
3176; SSE41-NEXT:    retq
3177;
3178; AVX1-LABEL: trunc_ssat_v4i64_v4i8_store:
3179; AVX1:       # %bb.0:
3180; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
3181; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [127,127]
3182; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm3
3183; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm4
3184; AVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm2, %xmm0
3185; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [18446744073709551488,18446744073709551488]
3186; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm5
3187; AVX1-NEXT:    vblendvpd %xmm3, %xmm1, %xmm2, %xmm1
3188; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm1, %xmm2
3189; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm4, %xmm1
3190; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
3191; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
3192; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
3193; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
3194; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3195; AVX1-NEXT:    vmovd %xmm0, (%rdi)
3196; AVX1-NEXT:    vzeroupper
3197; AVX1-NEXT:    retq
3198;
3199; AVX2-LABEL: trunc_ssat_v4i64_v4i8_store:
3200; AVX2:       # %bb.0:
3201; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [127,127,127,127]
3202; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
3203; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
3204; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
3205; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
3206; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
3207; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
3208; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
3209; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
3210; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
3211; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3212; AVX2-NEXT:    vmovd %xmm0, (%rdi)
3213; AVX2-NEXT:    vzeroupper
3214; AVX2-NEXT:    retq
3215;
3216; AVX512F-LABEL: trunc_ssat_v4i64_v4i8_store:
3217; AVX512F:       # %bb.0:
3218; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3219; AVX512F-NEXT:    vpmovsqb %zmm0, %xmm0
3220; AVX512F-NEXT:    vmovd %xmm0, (%rdi)
3221; AVX512F-NEXT:    vzeroupper
3222; AVX512F-NEXT:    retq
3223;
3224; AVX512VL-LABEL: trunc_ssat_v4i64_v4i8_store:
3225; AVX512VL:       # %bb.0:
3226; AVX512VL-NEXT:    vpmovsqb %ymm0, (%rdi)
3227; AVX512VL-NEXT:    vzeroupper
3228; AVX512VL-NEXT:    retq
3229;
3230; AVX512BW-LABEL: trunc_ssat_v4i64_v4i8_store:
3231; AVX512BW:       # %bb.0:
3232; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3233; AVX512BW-NEXT:    vpmovsqb %zmm0, %xmm0
3234; AVX512BW-NEXT:    vmovd %xmm0, (%rdi)
3235; AVX512BW-NEXT:    vzeroupper
3236; AVX512BW-NEXT:    retq
3237;
3238; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i8_store:
3239; AVX512BWVL:       # %bb.0:
3240; AVX512BWVL-NEXT:    vpmovsqb %ymm0, (%rdi)
3241; AVX512BWVL-NEXT:    vzeroupper
3242; AVX512BWVL-NEXT:    retq
3243;
3244; SKX-LABEL: trunc_ssat_v4i64_v4i8_store:
3245; SKX:       # %bb.0:
3246; SKX-NEXT:    vpmovsqb %ymm0, (%rdi)
3247; SKX-NEXT:    vzeroupper
3248; SKX-NEXT:    retq
3249  %1 = icmp slt <4 x i64> %a0, <i64 127, i64 127, i64 127, i64 127>
3250  %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 127, i64 127, i64 127, i64 127>
3251  %3 = icmp sgt <4 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128>
3252  %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128>
3253  %5 = trunc <4 x i64> %4 to <4 x i8>
3254  store <4 x i8> %5, <4 x i8> *%p1
3255  ret void
3256}
3257
3258define <8 x i8> @trunc_ssat_v8i64_v8i8(<8 x i64>* %p0) "min-legal-vector-width"="256" {
3259; SSE2-LABEL: trunc_ssat_v8i64_v8i8:
3260; SSE2:       # %bb.0:
3261; SSE2-NEXT:    movdqa (%rdi), %xmm6
3262; SSE2-NEXT:    movdqa 16(%rdi), %xmm9
3263; SSE2-NEXT:    movdqa 32(%rdi), %xmm3
3264; SSE2-NEXT:    movdqa 48(%rdi), %xmm5
3265; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [127,127]
3266; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
3267; SSE2-NEXT:    movdqa %xmm3, %xmm2
3268; SSE2-NEXT:    pxor %xmm1, %xmm2
3269; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [2147483775,2147483775]
3270; SSE2-NEXT:    movdqa %xmm10, %xmm7
3271; SSE2-NEXT:    pcmpgtd %xmm2, %xmm7
3272; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
3273; SSE2-NEXT:    pcmpeqd %xmm10, %xmm2
3274; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
3275; SSE2-NEXT:    pand %xmm0, %xmm4
3276; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
3277; SSE2-NEXT:    por %xmm4, %xmm2
3278; SSE2-NEXT:    pand %xmm2, %xmm3
3279; SSE2-NEXT:    pandn %xmm8, %xmm2
3280; SSE2-NEXT:    por %xmm3, %xmm2
3281; SSE2-NEXT:    movdqa %xmm5, %xmm0
3282; SSE2-NEXT:    pxor %xmm1, %xmm0
3283; SSE2-NEXT:    movdqa %xmm10, %xmm3
3284; SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
3285; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
3286; SSE2-NEXT:    pcmpeqd %xmm10, %xmm0
3287; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3288; SSE2-NEXT:    pand %xmm4, %xmm0
3289; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3290; SSE2-NEXT:    por %xmm0, %xmm3
3291; SSE2-NEXT:    pand %xmm3, %xmm5
3292; SSE2-NEXT:    pandn %xmm8, %xmm3
3293; SSE2-NEXT:    por %xmm5, %xmm3
3294; SSE2-NEXT:    movdqa %xmm6, %xmm0
3295; SSE2-NEXT:    pxor %xmm1, %xmm0
3296; SSE2-NEXT:    movdqa %xmm10, %xmm4
3297; SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
3298; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
3299; SSE2-NEXT:    pcmpeqd %xmm10, %xmm0
3300; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3301; SSE2-NEXT:    pand %xmm5, %xmm0
3302; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
3303; SSE2-NEXT:    por %xmm0, %xmm5
3304; SSE2-NEXT:    pand %xmm5, %xmm6
3305; SSE2-NEXT:    pandn %xmm8, %xmm5
3306; SSE2-NEXT:    por %xmm6, %xmm5
3307; SSE2-NEXT:    movdqa %xmm9, %xmm0
3308; SSE2-NEXT:    pxor %xmm1, %xmm0
3309; SSE2-NEXT:    movdqa %xmm10, %xmm4
3310; SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
3311; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
3312; SSE2-NEXT:    pcmpeqd %xmm10, %xmm0
3313; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3314; SSE2-NEXT:    pand %xmm6, %xmm0
3315; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm4[1,1,3,3]
3316; SSE2-NEXT:    por %xmm0, %xmm7
3317; SSE2-NEXT:    pand %xmm7, %xmm9
3318; SSE2-NEXT:    pandn %xmm8, %xmm7
3319; SSE2-NEXT:    por %xmm9, %xmm7
3320; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488]
3321; SSE2-NEXT:    movdqa %xmm7, %xmm0
3322; SSE2-NEXT:    pxor %xmm1, %xmm0
3323; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840]
3324; SSE2-NEXT:    movdqa %xmm0, %xmm4
3325; SSE2-NEXT:    pcmpgtd %xmm9, %xmm4
3326; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
3327; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
3328; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3329; SSE2-NEXT:    pand %xmm6, %xmm0
3330; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
3331; SSE2-NEXT:    por %xmm0, %xmm4
3332; SSE2-NEXT:    pand %xmm4, %xmm7
3333; SSE2-NEXT:    pandn %xmm8, %xmm4
3334; SSE2-NEXT:    por %xmm7, %xmm4
3335; SSE2-NEXT:    movdqa %xmm5, %xmm0
3336; SSE2-NEXT:    pxor %xmm1, %xmm0
3337; SSE2-NEXT:    movdqa %xmm0, %xmm6
3338; SSE2-NEXT:    pcmpgtd %xmm9, %xmm6
3339; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
3340; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
3341; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
3342; SSE2-NEXT:    pand %xmm10, %xmm7
3343; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
3344; SSE2-NEXT:    por %xmm7, %xmm0
3345; SSE2-NEXT:    pand %xmm0, %xmm5
3346; SSE2-NEXT:    pandn %xmm8, %xmm0
3347; SSE2-NEXT:    por %xmm5, %xmm0
3348; SSE2-NEXT:    packssdw %xmm4, %xmm0
3349; SSE2-NEXT:    movdqa %xmm3, %xmm4
3350; SSE2-NEXT:    pxor %xmm1, %xmm4
3351; SSE2-NEXT:    movdqa %xmm4, %xmm5
3352; SSE2-NEXT:    pcmpgtd %xmm9, %xmm5
3353; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
3354; SSE2-NEXT:    pcmpeqd %xmm9, %xmm4
3355; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
3356; SSE2-NEXT:    pand %xmm6, %xmm4
3357; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
3358; SSE2-NEXT:    por %xmm4, %xmm5
3359; SSE2-NEXT:    pand %xmm5, %xmm3
3360; SSE2-NEXT:    pandn %xmm8, %xmm5
3361; SSE2-NEXT:    por %xmm3, %xmm5
3362; SSE2-NEXT:    pxor %xmm2, %xmm1
3363; SSE2-NEXT:    movdqa %xmm1, %xmm3
3364; SSE2-NEXT:    pcmpgtd %xmm9, %xmm3
3365; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
3366; SSE2-NEXT:    pcmpeqd %xmm9, %xmm1
3367; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3368; SSE2-NEXT:    pand %xmm4, %xmm1
3369; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3370; SSE2-NEXT:    por %xmm1, %xmm3
3371; SSE2-NEXT:    pand %xmm3, %xmm2
3372; SSE2-NEXT:    pandn %xmm8, %xmm3
3373; SSE2-NEXT:    por %xmm2, %xmm3
3374; SSE2-NEXT:    packssdw %xmm5, %xmm3
3375; SSE2-NEXT:    packssdw %xmm3, %xmm0
3376; SSE2-NEXT:    packsswb %xmm0, %xmm0
3377; SSE2-NEXT:    retq
3378;
3379; SSSE3-LABEL: trunc_ssat_v8i64_v8i8:
3380; SSSE3:       # %bb.0:
3381; SSSE3-NEXT:    movdqa (%rdi), %xmm6
3382; SSSE3-NEXT:    movdqa 16(%rdi), %xmm9
3383; SSSE3-NEXT:    movdqa 32(%rdi), %xmm3
3384; SSSE3-NEXT:    movdqa 48(%rdi), %xmm5
3385; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [127,127]
3386; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
3387; SSSE3-NEXT:    movdqa %xmm3, %xmm2
3388; SSSE3-NEXT:    pxor %xmm1, %xmm2
3389; SSSE3-NEXT:    movdqa {{.*#+}} xmm10 = [2147483775,2147483775]
3390; SSSE3-NEXT:    movdqa %xmm10, %xmm7
3391; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm7
3392; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
3393; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm2
3394; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
3395; SSSE3-NEXT:    pand %xmm0, %xmm4
3396; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
3397; SSSE3-NEXT:    por %xmm4, %xmm2
3398; SSSE3-NEXT:    pand %xmm2, %xmm3
3399; SSSE3-NEXT:    pandn %xmm8, %xmm2
3400; SSSE3-NEXT:    por %xmm3, %xmm2
3401; SSSE3-NEXT:    movdqa %xmm5, %xmm0
3402; SSSE3-NEXT:    pxor %xmm1, %xmm0
3403; SSSE3-NEXT:    movdqa %xmm10, %xmm3
3404; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm3
3405; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
3406; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm0
3407; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3408; SSSE3-NEXT:    pand %xmm4, %xmm0
3409; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3410; SSSE3-NEXT:    por %xmm0, %xmm3
3411; SSSE3-NEXT:    pand %xmm3, %xmm5
3412; SSSE3-NEXT:    pandn %xmm8, %xmm3
3413; SSSE3-NEXT:    por %xmm5, %xmm3
3414; SSSE3-NEXT:    movdqa %xmm6, %xmm0
3415; SSSE3-NEXT:    pxor %xmm1, %xmm0
3416; SSSE3-NEXT:    movdqa %xmm10, %xmm4
3417; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm4
3418; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
3419; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm0
3420; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3421; SSSE3-NEXT:    pand %xmm5, %xmm0
3422; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
3423; SSSE3-NEXT:    por %xmm0, %xmm5
3424; SSSE3-NEXT:    pand %xmm5, %xmm6
3425; SSSE3-NEXT:    pandn %xmm8, %xmm5
3426; SSSE3-NEXT:    por %xmm6, %xmm5
3427; SSSE3-NEXT:    movdqa %xmm9, %xmm0
3428; SSSE3-NEXT:    pxor %xmm1, %xmm0
3429; SSSE3-NEXT:    movdqa %xmm10, %xmm4
3430; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm4
3431; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
3432; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm0
3433; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3434; SSSE3-NEXT:    pand %xmm6, %xmm0
3435; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm4[1,1,3,3]
3436; SSSE3-NEXT:    por %xmm0, %xmm7
3437; SSSE3-NEXT:    pand %xmm7, %xmm9
3438; SSSE3-NEXT:    pandn %xmm8, %xmm7
3439; SSSE3-NEXT:    por %xmm9, %xmm7
3440; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488]
3441; SSSE3-NEXT:    movdqa %xmm7, %xmm0
3442; SSSE3-NEXT:    pxor %xmm1, %xmm0
3443; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840]
3444; SSSE3-NEXT:    movdqa %xmm0, %xmm4
3445; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm4
3446; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
3447; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
3448; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3449; SSSE3-NEXT:    pand %xmm6, %xmm0
3450; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
3451; SSSE3-NEXT:    por %xmm0, %xmm4
3452; SSSE3-NEXT:    pand %xmm4, %xmm7
3453; SSSE3-NEXT:    pandn %xmm8, %xmm4
3454; SSSE3-NEXT:    por %xmm7, %xmm4
3455; SSSE3-NEXT:    movdqa %xmm5, %xmm0
3456; SSSE3-NEXT:    pxor %xmm1, %xmm0
3457; SSSE3-NEXT:    movdqa %xmm0, %xmm6
3458; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm6
3459; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
3460; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
3461; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
3462; SSSE3-NEXT:    pand %xmm10, %xmm7
3463; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
3464; SSSE3-NEXT:    por %xmm7, %xmm0
3465; SSSE3-NEXT:    pand %xmm0, %xmm5
3466; SSSE3-NEXT:    pandn %xmm8, %xmm0
3467; SSSE3-NEXT:    por %xmm5, %xmm0
3468; SSSE3-NEXT:    packssdw %xmm4, %xmm0
3469; SSSE3-NEXT:    movdqa %xmm3, %xmm4
3470; SSSE3-NEXT:    pxor %xmm1, %xmm4
3471; SSSE3-NEXT:    movdqa %xmm4, %xmm5
3472; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm5
3473; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
3474; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm4
3475; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
3476; SSSE3-NEXT:    pand %xmm6, %xmm4
3477; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
3478; SSSE3-NEXT:    por %xmm4, %xmm5
3479; SSSE3-NEXT:    pand %xmm5, %xmm3
3480; SSSE3-NEXT:    pandn %xmm8, %xmm5
3481; SSSE3-NEXT:    por %xmm3, %xmm5
3482; SSSE3-NEXT:    pxor %xmm2, %xmm1
3483; SSSE3-NEXT:    movdqa %xmm1, %xmm3
3484; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm3
3485; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
3486; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm1
3487; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3488; SSSE3-NEXT:    pand %xmm4, %xmm1
3489; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3490; SSSE3-NEXT:    por %xmm1, %xmm3
3491; SSSE3-NEXT:    pand %xmm3, %xmm2
3492; SSSE3-NEXT:    pandn %xmm8, %xmm3
3493; SSSE3-NEXT:    por %xmm2, %xmm3
3494; SSSE3-NEXT:    packssdw %xmm5, %xmm3
3495; SSSE3-NEXT:    packssdw %xmm3, %xmm0
3496; SSSE3-NEXT:    packsswb %xmm0, %xmm0
3497; SSSE3-NEXT:    retq
3498;
3499; SSE41-LABEL: trunc_ssat_v8i64_v8i8:
3500; SSE41:       # %bb.0:
3501; SSE41-NEXT:    movdqa (%rdi), %xmm10
3502; SSE41-NEXT:    movdqa 16(%rdi), %xmm9
3503; SSE41-NEXT:    movdqa 32(%rdi), %xmm3
3504; SSE41-NEXT:    movdqa 48(%rdi), %xmm5
3505; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [127,127]
3506; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
3507; SSE41-NEXT:    movdqa %xmm3, %xmm0
3508; SSE41-NEXT:    pxor %xmm2, %xmm0
3509; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [2147483775,2147483775]
3510; SSE41-NEXT:    movdqa %xmm4, %xmm7
3511; SSE41-NEXT:    pcmpeqd %xmm0, %xmm7
3512; SSE41-NEXT:    movdqa %xmm4, %xmm6
3513; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
3514; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
3515; SSE41-NEXT:    pand %xmm7, %xmm0
3516; SSE41-NEXT:    por %xmm6, %xmm0
3517; SSE41-NEXT:    movapd %xmm1, %xmm8
3518; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm8
3519; SSE41-NEXT:    movdqa %xmm5, %xmm0
3520; SSE41-NEXT:    pxor %xmm2, %xmm0
3521; SSE41-NEXT:    movdqa %xmm4, %xmm3
3522; SSE41-NEXT:    pcmpeqd %xmm0, %xmm3
3523; SSE41-NEXT:    movdqa %xmm4, %xmm6
3524; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
3525; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
3526; SSE41-NEXT:    pand %xmm3, %xmm0
3527; SSE41-NEXT:    por %xmm6, %xmm0
3528; SSE41-NEXT:    movapd %xmm1, %xmm11
3529; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm11
3530; SSE41-NEXT:    movdqa %xmm10, %xmm0
3531; SSE41-NEXT:    pxor %xmm2, %xmm0
3532; SSE41-NEXT:    movdqa %xmm4, %xmm3
3533; SSE41-NEXT:    pcmpeqd %xmm0, %xmm3
3534; SSE41-NEXT:    movdqa %xmm4, %xmm5
3535; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
3536; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
3537; SSE41-NEXT:    pand %xmm3, %xmm0
3538; SSE41-NEXT:    por %xmm5, %xmm0
3539; SSE41-NEXT:    movapd %xmm1, %xmm3
3540; SSE41-NEXT:    blendvpd %xmm0, %xmm10, %xmm3
3541; SSE41-NEXT:    movdqa %xmm9, %xmm0
3542; SSE41-NEXT:    pxor %xmm2, %xmm0
3543; SSE41-NEXT:    movdqa %xmm4, %xmm5
3544; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
3545; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
3546; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
3547; SSE41-NEXT:    pand %xmm5, %xmm0
3548; SSE41-NEXT:    por %xmm4, %xmm0
3549; SSE41-NEXT:    blendvpd %xmm0, %xmm9, %xmm1
3550; SSE41-NEXT:    movapd {{.*#+}} xmm5 = [18446744073709551488,18446744073709551488]
3551; SSE41-NEXT:    movapd %xmm1, %xmm4
3552; SSE41-NEXT:    xorpd %xmm2, %xmm4
3553; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840]
3554; SSE41-NEXT:    movapd %xmm4, %xmm7
3555; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
3556; SSE41-NEXT:    pcmpgtd %xmm6, %xmm4
3557; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
3558; SSE41-NEXT:    pand %xmm7, %xmm0
3559; SSE41-NEXT:    por %xmm4, %xmm0
3560; SSE41-NEXT:    movapd %xmm5, %xmm4
3561; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm4
3562; SSE41-NEXT:    movapd %xmm3, %xmm1
3563; SSE41-NEXT:    xorpd %xmm2, %xmm1
3564; SSE41-NEXT:    movapd %xmm1, %xmm7
3565; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
3566; SSE41-NEXT:    pcmpgtd %xmm6, %xmm1
3567; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
3568; SSE41-NEXT:    pand %xmm7, %xmm0
3569; SSE41-NEXT:    por %xmm1, %xmm0
3570; SSE41-NEXT:    movapd %xmm5, %xmm1
3571; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
3572; SSE41-NEXT:    packssdw %xmm4, %xmm1
3573; SSE41-NEXT:    movapd %xmm11, %xmm3
3574; SSE41-NEXT:    xorpd %xmm2, %xmm3
3575; SSE41-NEXT:    movapd %xmm3, %xmm4
3576; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
3577; SSE41-NEXT:    pcmpgtd %xmm6, %xmm3
3578; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
3579; SSE41-NEXT:    pand %xmm4, %xmm0
3580; SSE41-NEXT:    por %xmm3, %xmm0
3581; SSE41-NEXT:    movapd %xmm5, %xmm3
3582; SSE41-NEXT:    blendvpd %xmm0, %xmm11, %xmm3
3583; SSE41-NEXT:    xorpd %xmm8, %xmm2
3584; SSE41-NEXT:    movapd %xmm2, %xmm4
3585; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
3586; SSE41-NEXT:    pcmpgtd %xmm6, %xmm2
3587; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
3588; SSE41-NEXT:    pand %xmm4, %xmm0
3589; SSE41-NEXT:    por %xmm2, %xmm0
3590; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm5
3591; SSE41-NEXT:    packssdw %xmm3, %xmm5
3592; SSE41-NEXT:    packssdw %xmm5, %xmm1
3593; SSE41-NEXT:    packsswb %xmm1, %xmm1
3594; SSE41-NEXT:    movdqa %xmm1, %xmm0
3595; SSE41-NEXT:    retq
3596;
3597; AVX1-LABEL: trunc_ssat_v8i64_v8i8:
3598; AVX1:       # %bb.0:
3599; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
3600; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
3601; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm2
3602; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm3
3603; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [127,127]
3604; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm8
3605; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm6
3606; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm7
3607; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm4, %xmm5
3608; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
3609; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [18446744073709551488,18446744073709551488]
3610; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm0, %xmm9
3611; AVX1-NEXT:    vblendvpd %xmm7, %xmm1, %xmm4, %xmm1
3612; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm1, %xmm7
3613; AVX1-NEXT:    vblendvpd %xmm6, %xmm2, %xmm4, %xmm2
3614; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm2, %xmm6
3615; AVX1-NEXT:    vblendvpd %xmm8, %xmm3, %xmm4, %xmm3
3616; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm4
3617; AVX1-NEXT:    vblendvpd %xmm4, %xmm3, %xmm5, %xmm3
3618; AVX1-NEXT:    vblendvpd %xmm6, %xmm2, %xmm5, %xmm2
3619; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
3620; AVX1-NEXT:    vblendvpd %xmm7, %xmm1, %xmm5, %xmm1
3621; AVX1-NEXT:    vblendvpd %xmm9, %xmm0, %xmm5, %xmm0
3622; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
3623; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
3624; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3625; AVX1-NEXT:    retq
3626;
3627; AVX2-LABEL: trunc_ssat_v8i64_v8i8:
3628; AVX2:       # %bb.0:
3629; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
3630; AVX2-NEXT:    vmovdqa 32(%rdi), %ymm1
3631; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [127,127,127,127]
3632; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
3633; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
3634; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
3635; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
3636; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
3637; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
3638; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
3639; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
3640; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
3641; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
3642; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3643; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
3644; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
3645; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3646; AVX2-NEXT:    vzeroupper
3647; AVX2-NEXT:    retq
3648;
3649; AVX512-LABEL: trunc_ssat_v8i64_v8i8:
3650; AVX512:       # %bb.0:
3651; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
3652; AVX512-NEXT:    vpmovsqb %zmm0, %xmm0
3653; AVX512-NEXT:    vzeroupper
3654; AVX512-NEXT:    retq
3655;
3656; SKX-LABEL: trunc_ssat_v8i64_v8i8:
3657; SKX:       # %bb.0:
3658; SKX-NEXT:    vmovdqa (%rdi), %ymm0
3659; SKX-NEXT:    vmovdqa 32(%rdi), %ymm1
3660; SKX-NEXT:    vpmovsqb %ymm1, %xmm1
3661; SKX-NEXT:    vpmovsqb %ymm0, %xmm0
3662; SKX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3663; SKX-NEXT:    vzeroupper
3664; SKX-NEXT:    retq
3665  %a0 = load <8 x i64>, <8 x i64>* %p0
3666  %1 = icmp slt <8 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
3667  %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
3668  %3 = icmp sgt <8 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
3669  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
3670  %5 = trunc <8 x i64> %4 to <8 x i8>
3671  ret <8 x i8> %5
3672}
3673
3674; TODO: The AVX1 codegen shows a missed opportunity to narrow blendv+logic to 128-bit.
3675
3676define void @trunc_ssat_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) "min-legal-vector-width"="256" {
3677; SSE2-LABEL: trunc_ssat_v8i64_v8i8_store:
3678; SSE2:       # %bb.0:
3679; SSE2-NEXT:    movdqa (%rdi), %xmm6
3680; SSE2-NEXT:    movdqa 16(%rdi), %xmm9
3681; SSE2-NEXT:    movdqa 32(%rdi), %xmm2
3682; SSE2-NEXT:    movdqa 48(%rdi), %xmm5
3683; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [127,127]
3684; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
3685; SSE2-NEXT:    movdqa %xmm2, %xmm1
3686; SSE2-NEXT:    pxor %xmm0, %xmm1
3687; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [2147483775,2147483775]
3688; SSE2-NEXT:    movdqa %xmm10, %xmm7
3689; SSE2-NEXT:    pcmpgtd %xmm1, %xmm7
3690; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm7[0,0,2,2]
3691; SSE2-NEXT:    pcmpeqd %xmm10, %xmm1
3692; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
3693; SSE2-NEXT:    pand %xmm3, %xmm4
3694; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm7[1,1,3,3]
3695; SSE2-NEXT:    por %xmm4, %xmm1
3696; SSE2-NEXT:    pand %xmm1, %xmm2
3697; SSE2-NEXT:    pandn %xmm8, %xmm1
3698; SSE2-NEXT:    por %xmm2, %xmm1
3699; SSE2-NEXT:    movdqa %xmm5, %xmm2
3700; SSE2-NEXT:    pxor %xmm0, %xmm2
3701; SSE2-NEXT:    movdqa %xmm10, %xmm3
3702; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
3703; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
3704; SSE2-NEXT:    pcmpeqd %xmm10, %xmm2
3705; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
3706; SSE2-NEXT:    pand %xmm4, %xmm7
3707; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
3708; SSE2-NEXT:    por %xmm7, %xmm2
3709; SSE2-NEXT:    pand %xmm2, %xmm5
3710; SSE2-NEXT:    pandn %xmm8, %xmm2
3711; SSE2-NEXT:    por %xmm5, %xmm2
3712; SSE2-NEXT:    movdqa %xmm6, %xmm3
3713; SSE2-NEXT:    pxor %xmm0, %xmm3
3714; SSE2-NEXT:    movdqa %xmm10, %xmm4
3715; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
3716; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
3717; SSE2-NEXT:    pcmpeqd %xmm10, %xmm3
3718; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3719; SSE2-NEXT:    pand %xmm5, %xmm3
3720; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
3721; SSE2-NEXT:    por %xmm3, %xmm5
3722; SSE2-NEXT:    pand %xmm5, %xmm6
3723; SSE2-NEXT:    pandn %xmm8, %xmm5
3724; SSE2-NEXT:    por %xmm6, %xmm5
3725; SSE2-NEXT:    movdqa %xmm9, %xmm3
3726; SSE2-NEXT:    pxor %xmm0, %xmm3
3727; SSE2-NEXT:    movdqa %xmm10, %xmm4
3728; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
3729; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
3730; SSE2-NEXT:    pcmpeqd %xmm10, %xmm3
3731; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3732; SSE2-NEXT:    pand %xmm6, %xmm3
3733; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
3734; SSE2-NEXT:    por %xmm3, %xmm6
3735; SSE2-NEXT:    pand %xmm6, %xmm9
3736; SSE2-NEXT:    pandn %xmm8, %xmm6
3737; SSE2-NEXT:    por %xmm9, %xmm6
3738; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488]
3739; SSE2-NEXT:    movdqa %xmm6, %xmm7
3740; SSE2-NEXT:    pxor %xmm0, %xmm7
3741; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840]
3742; SSE2-NEXT:    movdqa %xmm7, %xmm3
3743; SSE2-NEXT:    pcmpgtd %xmm9, %xmm3
3744; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
3745; SSE2-NEXT:    pcmpeqd %xmm9, %xmm7
3746; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
3747; SSE2-NEXT:    pand %xmm4, %xmm7
3748; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3749; SSE2-NEXT:    por %xmm7, %xmm3
3750; SSE2-NEXT:    pand %xmm3, %xmm6
3751; SSE2-NEXT:    pandn %xmm8, %xmm3
3752; SSE2-NEXT:    por %xmm6, %xmm3
3753; SSE2-NEXT:    movdqa %xmm5, %xmm4
3754; SSE2-NEXT:    pxor %xmm0, %xmm4
3755; SSE2-NEXT:    movdqa %xmm4, %xmm6
3756; SSE2-NEXT:    pcmpgtd %xmm9, %xmm6
3757; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
3758; SSE2-NEXT:    pcmpeqd %xmm9, %xmm4
3759; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
3760; SSE2-NEXT:    pand %xmm7, %xmm4
3761; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
3762; SSE2-NEXT:    por %xmm4, %xmm6
3763; SSE2-NEXT:    pand %xmm6, %xmm5
3764; SSE2-NEXT:    pandn %xmm8, %xmm6
3765; SSE2-NEXT:    por %xmm5, %xmm6
3766; SSE2-NEXT:    packssdw %xmm3, %xmm6
3767; SSE2-NEXT:    movdqa %xmm2, %xmm3
3768; SSE2-NEXT:    pxor %xmm0, %xmm3
3769; SSE2-NEXT:    movdqa %xmm3, %xmm4
3770; SSE2-NEXT:    pcmpgtd %xmm9, %xmm4
3771; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
3772; SSE2-NEXT:    pcmpeqd %xmm9, %xmm3
3773; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3774; SSE2-NEXT:    pand %xmm5, %xmm3
3775; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
3776; SSE2-NEXT:    por %xmm3, %xmm4
3777; SSE2-NEXT:    pand %xmm4, %xmm2
3778; SSE2-NEXT:    pandn %xmm8, %xmm4
3779; SSE2-NEXT:    por %xmm2, %xmm4
3780; SSE2-NEXT:    pxor %xmm1, %xmm0
3781; SSE2-NEXT:    movdqa %xmm0, %xmm2
3782; SSE2-NEXT:    pcmpgtd %xmm9, %xmm2
3783; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
3784; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
3785; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3786; SSE2-NEXT:    pand %xmm3, %xmm0
3787; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
3788; SSE2-NEXT:    por %xmm0, %xmm2
3789; SSE2-NEXT:    pand %xmm2, %xmm1
3790; SSE2-NEXT:    pandn %xmm8, %xmm2
3791; SSE2-NEXT:    por %xmm1, %xmm2
3792; SSE2-NEXT:    packssdw %xmm4, %xmm2
3793; SSE2-NEXT:    packssdw %xmm2, %xmm6
3794; SSE2-NEXT:    packsswb %xmm6, %xmm6
3795; SSE2-NEXT:    movq %xmm6, (%rsi)
3796; SSE2-NEXT:    retq
3797;
3798; SSSE3-LABEL: trunc_ssat_v8i64_v8i8_store:
3799; SSSE3:       # %bb.0:
3800; SSSE3-NEXT:    movdqa (%rdi), %xmm6
3801; SSSE3-NEXT:    movdqa 16(%rdi), %xmm9
3802; SSSE3-NEXT:    movdqa 32(%rdi), %xmm2
3803; SSSE3-NEXT:    movdqa 48(%rdi), %xmm5
3804; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [127,127]
3805; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
3806; SSSE3-NEXT:    movdqa %xmm2, %xmm1
3807; SSSE3-NEXT:    pxor %xmm0, %xmm1
3808; SSSE3-NEXT:    movdqa {{.*#+}} xmm10 = [2147483775,2147483775]
3809; SSSE3-NEXT:    movdqa %xmm10, %xmm7
3810; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm7
3811; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm7[0,0,2,2]
3812; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm1
3813; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
3814; SSSE3-NEXT:    pand %xmm3, %xmm4
3815; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm7[1,1,3,3]
3816; SSSE3-NEXT:    por %xmm4, %xmm1
3817; SSSE3-NEXT:    pand %xmm1, %xmm2
3818; SSSE3-NEXT:    pandn %xmm8, %xmm1
3819; SSSE3-NEXT:    por %xmm2, %xmm1
3820; SSSE3-NEXT:    movdqa %xmm5, %xmm2
3821; SSSE3-NEXT:    pxor %xmm0, %xmm2
3822; SSSE3-NEXT:    movdqa %xmm10, %xmm3
3823; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm3
3824; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
3825; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm2
3826; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
3827; SSSE3-NEXT:    pand %xmm4, %xmm7
3828; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
3829; SSSE3-NEXT:    por %xmm7, %xmm2
3830; SSSE3-NEXT:    pand %xmm2, %xmm5
3831; SSSE3-NEXT:    pandn %xmm8, %xmm2
3832; SSSE3-NEXT:    por %xmm5, %xmm2
3833; SSSE3-NEXT:    movdqa %xmm6, %xmm3
3834; SSSE3-NEXT:    pxor %xmm0, %xmm3
3835; SSSE3-NEXT:    movdqa %xmm10, %xmm4
3836; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm4
3837; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
3838; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm3
3839; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3840; SSSE3-NEXT:    pand %xmm5, %xmm3
3841; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
3842; SSSE3-NEXT:    por %xmm3, %xmm5
3843; SSSE3-NEXT:    pand %xmm5, %xmm6
3844; SSSE3-NEXT:    pandn %xmm8, %xmm5
3845; SSSE3-NEXT:    por %xmm6, %xmm5
3846; SSSE3-NEXT:    movdqa %xmm9, %xmm3
3847; SSSE3-NEXT:    pxor %xmm0, %xmm3
3848; SSSE3-NEXT:    movdqa %xmm10, %xmm4
3849; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm4
3850; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
3851; SSSE3-NEXT:    pcmpeqd %xmm10, %xmm3
3852; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3853; SSSE3-NEXT:    pand %xmm6, %xmm3
3854; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
3855; SSSE3-NEXT:    por %xmm3, %xmm6
3856; SSSE3-NEXT:    pand %xmm6, %xmm9
3857; SSSE3-NEXT:    pandn %xmm8, %xmm6
3858; SSSE3-NEXT:    por %xmm9, %xmm6
3859; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488]
3860; SSSE3-NEXT:    movdqa %xmm6, %xmm7
3861; SSSE3-NEXT:    pxor %xmm0, %xmm7
3862; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840]
3863; SSSE3-NEXT:    movdqa %xmm7, %xmm3
3864; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm3
3865; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
3866; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm7
3867; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
3868; SSSE3-NEXT:    pand %xmm4, %xmm7
3869; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3870; SSSE3-NEXT:    por %xmm7, %xmm3
3871; SSSE3-NEXT:    pand %xmm3, %xmm6
3872; SSSE3-NEXT:    pandn %xmm8, %xmm3
3873; SSSE3-NEXT:    por %xmm6, %xmm3
3874; SSSE3-NEXT:    movdqa %xmm5, %xmm4
3875; SSSE3-NEXT:    pxor %xmm0, %xmm4
3876; SSSE3-NEXT:    movdqa %xmm4, %xmm6
3877; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm6
3878; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
3879; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm4
3880; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
3881; SSSE3-NEXT:    pand %xmm7, %xmm4
3882; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
3883; SSSE3-NEXT:    por %xmm4, %xmm6
3884; SSSE3-NEXT:    pand %xmm6, %xmm5
3885; SSSE3-NEXT:    pandn %xmm8, %xmm6
3886; SSSE3-NEXT:    por %xmm5, %xmm6
3887; SSSE3-NEXT:    packssdw %xmm3, %xmm6
3888; SSSE3-NEXT:    movdqa %xmm2, %xmm3
3889; SSSE3-NEXT:    pxor %xmm0, %xmm3
3890; SSSE3-NEXT:    movdqa %xmm3, %xmm4
3891; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm4
3892; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
3893; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm3
3894; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3895; SSSE3-NEXT:    pand %xmm5, %xmm3
3896; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
3897; SSSE3-NEXT:    por %xmm3, %xmm4
3898; SSSE3-NEXT:    pand %xmm4, %xmm2
3899; SSSE3-NEXT:    pandn %xmm8, %xmm4
3900; SSSE3-NEXT:    por %xmm2, %xmm4
3901; SSSE3-NEXT:    pxor %xmm1, %xmm0
3902; SSSE3-NEXT:    movdqa %xmm0, %xmm2
3903; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm2
3904; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
3905; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
3906; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3907; SSSE3-NEXT:    pand %xmm3, %xmm0
3908; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
3909; SSSE3-NEXT:    por %xmm0, %xmm2
3910; SSSE3-NEXT:    pand %xmm2, %xmm1
3911; SSSE3-NEXT:    pandn %xmm8, %xmm2
3912; SSSE3-NEXT:    por %xmm1, %xmm2
3913; SSSE3-NEXT:    packssdw %xmm4, %xmm2
3914; SSSE3-NEXT:    packssdw %xmm2, %xmm6
3915; SSSE3-NEXT:    packsswb %xmm6, %xmm6
3916; SSSE3-NEXT:    movq %xmm6, (%rsi)
3917; SSSE3-NEXT:    retq
3918;
3919; SSE41-LABEL: trunc_ssat_v8i64_v8i8_store:
3920; SSE41:       # %bb.0:
3921; SSE41-NEXT:    movdqa (%rdi), %xmm10
3922; SSE41-NEXT:    movdqa 16(%rdi), %xmm9
3923; SSE41-NEXT:    movdqa 32(%rdi), %xmm2
3924; SSE41-NEXT:    movdqa 48(%rdi), %xmm5
3925; SSE41-NEXT:    movapd {{.*#+}} xmm4 = [127,127]
3926; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
3927; SSE41-NEXT:    movdqa %xmm2, %xmm0
3928; SSE41-NEXT:    pxor %xmm1, %xmm0
3929; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2147483775,2147483775]
3930; SSE41-NEXT:    movdqa %xmm3, %xmm7
3931; SSE41-NEXT:    pcmpeqd %xmm0, %xmm7
3932; SSE41-NEXT:    movdqa %xmm3, %xmm6
3933; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
3934; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
3935; SSE41-NEXT:    pand %xmm7, %xmm0
3936; SSE41-NEXT:    por %xmm6, %xmm0
3937; SSE41-NEXT:    movapd %xmm4, %xmm8
3938; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm8
3939; SSE41-NEXT:    movdqa %xmm5, %xmm0
3940; SSE41-NEXT:    pxor %xmm1, %xmm0
3941; SSE41-NEXT:    movdqa %xmm3, %xmm2
3942; SSE41-NEXT:    pcmpeqd %xmm0, %xmm2
3943; SSE41-NEXT:    movdqa %xmm3, %xmm6
3944; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
3945; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
3946; SSE41-NEXT:    pand %xmm2, %xmm0
3947; SSE41-NEXT:    por %xmm6, %xmm0
3948; SSE41-NEXT:    movapd %xmm4, %xmm11
3949; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm11
3950; SSE41-NEXT:    movdqa %xmm10, %xmm0
3951; SSE41-NEXT:    pxor %xmm1, %xmm0
3952; SSE41-NEXT:    movdqa %xmm3, %xmm2
3953; SSE41-NEXT:    pcmpeqd %xmm0, %xmm2
3954; SSE41-NEXT:    movdqa %xmm3, %xmm5
3955; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
3956; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
3957; SSE41-NEXT:    pand %xmm2, %xmm0
3958; SSE41-NEXT:    por %xmm5, %xmm0
3959; SSE41-NEXT:    movapd %xmm4, %xmm2
3960; SSE41-NEXT:    blendvpd %xmm0, %xmm10, %xmm2
3961; SSE41-NEXT:    movdqa %xmm9, %xmm0
3962; SSE41-NEXT:    pxor %xmm1, %xmm0
3963; SSE41-NEXT:    movdqa %xmm3, %xmm5
3964; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
3965; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
3966; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
3967; SSE41-NEXT:    pand %xmm5, %xmm0
3968; SSE41-NEXT:    por %xmm3, %xmm0
3969; SSE41-NEXT:    blendvpd %xmm0, %xmm9, %xmm4
3970; SSE41-NEXT:    movapd {{.*#+}} xmm5 = [18446744073709551488,18446744073709551488]
3971; SSE41-NEXT:    movapd %xmm4, %xmm3
3972; SSE41-NEXT:    xorpd %xmm1, %xmm3
3973; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840]
3974; SSE41-NEXT:    movapd %xmm3, %xmm7
3975; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
3976; SSE41-NEXT:    pcmpgtd %xmm6, %xmm3
3977; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
3978; SSE41-NEXT:    pand %xmm7, %xmm0
3979; SSE41-NEXT:    por %xmm3, %xmm0
3980; SSE41-NEXT:    movapd %xmm5, %xmm3
3981; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm3
3982; SSE41-NEXT:    movapd %xmm2, %xmm4
3983; SSE41-NEXT:    xorpd %xmm1, %xmm4
3984; SSE41-NEXT:    movapd %xmm4, %xmm7
3985; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
3986; SSE41-NEXT:    pcmpgtd %xmm6, %xmm4
3987; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
3988; SSE41-NEXT:    pand %xmm7, %xmm0
3989; SSE41-NEXT:    por %xmm4, %xmm0
3990; SSE41-NEXT:    movapd %xmm5, %xmm4
3991; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm4
3992; SSE41-NEXT:    packssdw %xmm3, %xmm4
3993; SSE41-NEXT:    movapd %xmm11, %xmm2
3994; SSE41-NEXT:    xorpd %xmm1, %xmm2
3995; SSE41-NEXT:    movapd %xmm2, %xmm3
3996; SSE41-NEXT:    pcmpeqd %xmm6, %xmm3
3997; SSE41-NEXT:    pcmpgtd %xmm6, %xmm2
3998; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
3999; SSE41-NEXT:    pand %xmm3, %xmm0
4000; SSE41-NEXT:    por %xmm2, %xmm0
4001; SSE41-NEXT:    movapd %xmm5, %xmm2
4002; SSE41-NEXT:    blendvpd %xmm0, %xmm11, %xmm2
4003; SSE41-NEXT:    xorpd %xmm8, %xmm1
4004; SSE41-NEXT:    movapd %xmm1, %xmm3
4005; SSE41-NEXT:    pcmpeqd %xmm6, %xmm3
4006; SSE41-NEXT:    pcmpgtd %xmm6, %xmm1
4007; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
4008; SSE41-NEXT:    pand %xmm3, %xmm0
4009; SSE41-NEXT:    por %xmm1, %xmm0
4010; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm5
4011; SSE41-NEXT:    packssdw %xmm2, %xmm5
4012; SSE41-NEXT:    packssdw %xmm5, %xmm4
4013; SSE41-NEXT:    packsswb %xmm4, %xmm4
4014; SSE41-NEXT:    movq %xmm4, (%rsi)
4015; SSE41-NEXT:    retq
4016;
4017; AVX1-LABEL: trunc_ssat_v8i64_v8i8_store:
4018; AVX1:       # %bb.0:
4019; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
4020; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
4021; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm2
4022; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm3
4023; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [127,127]
4024; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm8
4025; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm6
4026; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm7
4027; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm4, %xmm5
4028; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
4029; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [18446744073709551488,18446744073709551488]
4030; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm0, %xmm9
4031; AVX1-NEXT:    vblendvpd %xmm7, %xmm1, %xmm4, %xmm1
4032; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm1, %xmm7
4033; AVX1-NEXT:    vblendvpd %xmm6, %xmm2, %xmm4, %xmm2
4034; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm2, %xmm6
4035; AVX1-NEXT:    vblendvpd %xmm8, %xmm3, %xmm4, %xmm3
4036; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm4
4037; AVX1-NEXT:    vblendvpd %xmm4, %xmm3, %xmm5, %xmm3
4038; AVX1-NEXT:    vblendvpd %xmm6, %xmm2, %xmm5, %xmm2
4039; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
4040; AVX1-NEXT:    vblendvpd %xmm7, %xmm1, %xmm5, %xmm1
4041; AVX1-NEXT:    vblendvpd %xmm9, %xmm0, %xmm5, %xmm0
4042; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
4043; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
4044; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
4045; AVX1-NEXT:    vmovq %xmm0, (%rsi)
4046; AVX1-NEXT:    retq
4047;
4048; AVX2-LABEL: trunc_ssat_v8i64_v8i8_store:
4049; AVX2:       # %bb.0:
4050; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
4051; AVX2-NEXT:    vmovdqa 32(%rdi), %ymm1
4052; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [127,127,127,127]
4053; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
4054; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
4055; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
4056; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
4057; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
4058; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
4059; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
4060; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
4061; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
4062; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
4063; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4064; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
4065; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
4066; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
4067; AVX2-NEXT:    vmovq %xmm0, (%rsi)
4068; AVX2-NEXT:    vzeroupper
4069; AVX2-NEXT:    retq
4070;
4071; AVX512-LABEL: trunc_ssat_v8i64_v8i8_store:
4072; AVX512:       # %bb.0:
4073; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
4074; AVX512-NEXT:    vpmovsqb %zmm0, (%rsi)
4075; AVX512-NEXT:    vzeroupper
4076; AVX512-NEXT:    retq
4077;
4078; SKX-LABEL: trunc_ssat_v8i64_v8i8_store:
4079; SKX:       # %bb.0:
4080; SKX-NEXT:    vmovdqa (%rdi), %ymm0
4081; SKX-NEXT:    vmovdqa 32(%rdi), %ymm1
4082; SKX-NEXT:    vpmovsqb %ymm1, %xmm1
4083; SKX-NEXT:    vpmovsqb %ymm0, %xmm0
4084; SKX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4085; SKX-NEXT:    vmovq %xmm0, (%rsi)
4086; SKX-NEXT:    vzeroupper
4087; SKX-NEXT:    retq
4088  %a0 = load <8 x i64>, <8 x i64>* %p0
4089  %1 = icmp slt <8 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
4090  %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
4091  %3 = icmp sgt <8 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
4092  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
4093  %5 = trunc <8 x i64> %4 to <8 x i8>
4094  store <8 x i8> %5, <8 x i8> *%p1
4095  ret void
4096}
4097
4098define <16 x i8> @trunc_ssat_v16i64_v16i8(<16 x i64>* %p0) "min-legal-vector-width"="256" {
4099; SSE2-LABEL: trunc_ssat_v16i64_v16i8:
4100; SSE2:       # %bb.0:
4101; SSE2-NEXT:    movdqa (%rdi), %xmm10
4102; SSE2-NEXT:    movdqa 16(%rdi), %xmm9
4103; SSE2-NEXT:    movdqa 32(%rdi), %xmm15
4104; SSE2-NEXT:    movdqa 48(%rdi), %xmm13
4105; SSE2-NEXT:    movdqa 80(%rdi), %xmm6
4106; SSE2-NEXT:    movdqa 64(%rdi), %xmm3
4107; SSE2-NEXT:    movdqa 112(%rdi), %xmm4
4108; SSE2-NEXT:    movdqa 96(%rdi), %xmm7
4109; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [127,127]
4110; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
4111; SSE2-NEXT:    movdqa %xmm7, %xmm5
4112; SSE2-NEXT:    pxor %xmm1, %xmm5
4113; SSE2-NEXT:    movdqa {{.*#+}} xmm14 = [2147483775,2147483775]
4114; SSE2-NEXT:    movdqa %xmm14, %xmm0
4115; SSE2-NEXT:    pcmpgtd %xmm5, %xmm0
4116; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
4117; SSE2-NEXT:    pcmpeqd %xmm14, %xmm5
4118; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
4119; SSE2-NEXT:    pand %xmm2, %xmm5
4120; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm0[1,1,3,3]
4121; SSE2-NEXT:    por %xmm5, %xmm11
4122; SSE2-NEXT:    pand %xmm11, %xmm7
4123; SSE2-NEXT:    pandn %xmm8, %xmm11
4124; SSE2-NEXT:    por %xmm7, %xmm11
4125; SSE2-NEXT:    movdqa %xmm4, %xmm0
4126; SSE2-NEXT:    pxor %xmm1, %xmm0
4127; SSE2-NEXT:    movdqa %xmm14, %xmm2
4128; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
4129; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2]
4130; SSE2-NEXT:    pcmpeqd %xmm14, %xmm0
4131; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4132; SSE2-NEXT:    pand %xmm5, %xmm0
4133; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm2[1,1,3,3]
4134; SSE2-NEXT:    por %xmm0, %xmm12
4135; SSE2-NEXT:    pand %xmm12, %xmm4
4136; SSE2-NEXT:    pandn %xmm8, %xmm12
4137; SSE2-NEXT:    por %xmm4, %xmm12
4138; SSE2-NEXT:    movdqa %xmm3, %xmm0
4139; SSE2-NEXT:    pxor %xmm1, %xmm0
4140; SSE2-NEXT:    movdqa %xmm14, %xmm2
4141; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
4142; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
4143; SSE2-NEXT:    pcmpeqd %xmm14, %xmm0
4144; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4145; SSE2-NEXT:    pand %xmm4, %xmm0
4146; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
4147; SSE2-NEXT:    por %xmm0, %xmm4
4148; SSE2-NEXT:    pand %xmm4, %xmm3
4149; SSE2-NEXT:    pandn %xmm8, %xmm4
4150; SSE2-NEXT:    por %xmm3, %xmm4
4151; SSE2-NEXT:    movdqa %xmm6, %xmm0
4152; SSE2-NEXT:    pxor %xmm1, %xmm0
4153; SSE2-NEXT:    movdqa %xmm14, %xmm2
4154; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
4155; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
4156; SSE2-NEXT:    pcmpeqd %xmm14, %xmm0
4157; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4158; SSE2-NEXT:    pand %xmm3, %xmm0
4159; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
4160; SSE2-NEXT:    por %xmm0, %xmm5
4161; SSE2-NEXT:    pand %xmm5, %xmm6
4162; SSE2-NEXT:    pandn %xmm8, %xmm5
4163; SSE2-NEXT:    por %xmm6, %xmm5
4164; SSE2-NEXT:    movdqa %xmm15, %xmm0
4165; SSE2-NEXT:    pxor %xmm1, %xmm0
4166; SSE2-NEXT:    movdqa %xmm14, %xmm2
4167; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
4168; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
4169; SSE2-NEXT:    pcmpeqd %xmm14, %xmm0
4170; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4171; SSE2-NEXT:    pand %xmm3, %xmm0
4172; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm2[1,1,3,3]
4173; SSE2-NEXT:    por %xmm0, %xmm6
4174; SSE2-NEXT:    pand %xmm6, %xmm15
4175; SSE2-NEXT:    pandn %xmm8, %xmm6
4176; SSE2-NEXT:    por %xmm15, %xmm6
4177; SSE2-NEXT:    movdqa %xmm13, %xmm0
4178; SSE2-NEXT:    pxor %xmm1, %xmm0
4179; SSE2-NEXT:    movdqa %xmm14, %xmm2
4180; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
4181; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
4182; SSE2-NEXT:    pcmpeqd %xmm14, %xmm0
4183; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4184; SSE2-NEXT:    pand %xmm3, %xmm0
4185; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm2[1,1,3,3]
4186; SSE2-NEXT:    por %xmm0, %xmm15
4187; SSE2-NEXT:    pand %xmm15, %xmm13
4188; SSE2-NEXT:    pandn %xmm8, %xmm15
4189; SSE2-NEXT:    por %xmm13, %xmm15
4190; SSE2-NEXT:    movdqa %xmm10, %xmm0
4191; SSE2-NEXT:    pxor %xmm1, %xmm0
4192; SSE2-NEXT:    movdqa %xmm14, %xmm3
4193; SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
4194; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[0,0,2,2]
4195; SSE2-NEXT:    pcmpeqd %xmm14, %xmm0
4196; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4197; SSE2-NEXT:    pand %xmm7, %xmm0
4198; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm3[1,1,3,3]
4199; SSE2-NEXT:    por %xmm0, %xmm13
4200; SSE2-NEXT:    pand %xmm13, %xmm10
4201; SSE2-NEXT:    pandn %xmm8, %xmm13
4202; SSE2-NEXT:    por %xmm10, %xmm13
4203; SSE2-NEXT:    movdqa %xmm9, %xmm0
4204; SSE2-NEXT:    pxor %xmm1, %xmm0
4205; SSE2-NEXT:    movdqa %xmm14, %xmm7
4206; SSE2-NEXT:    pcmpgtd %xmm0, %xmm7
4207; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
4208; SSE2-NEXT:    pcmpeqd %xmm14, %xmm0
4209; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4210; SSE2-NEXT:    pand %xmm10, %xmm0
4211; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
4212; SSE2-NEXT:    por %xmm0, %xmm7
4213; SSE2-NEXT:    pand %xmm7, %xmm9
4214; SSE2-NEXT:    pandn %xmm8, %xmm7
4215; SSE2-NEXT:    por %xmm9, %xmm7
4216; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488]
4217; SSE2-NEXT:    movdqa %xmm7, %xmm0
4218; SSE2-NEXT:    pxor %xmm1, %xmm0
4219; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840]
4220; SSE2-NEXT:    movdqa %xmm0, %xmm2
4221; SSE2-NEXT:    pcmpgtd %xmm9, %xmm2
4222; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm2[0,0,2,2]
4223; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
4224; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4225; SSE2-NEXT:    pand %xmm10, %xmm0
4226; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
4227; SSE2-NEXT:    por %xmm0, %xmm2
4228; SSE2-NEXT:    pand %xmm2, %xmm7
4229; SSE2-NEXT:    pandn %xmm8, %xmm2
4230; SSE2-NEXT:    por %xmm7, %xmm2
4231; SSE2-NEXT:    movdqa %xmm13, %xmm0
4232; SSE2-NEXT:    pxor %xmm1, %xmm0
4233; SSE2-NEXT:    movdqa %xmm0, %xmm7
4234; SSE2-NEXT:    pcmpgtd %xmm9, %xmm7
4235; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
4236; SSE2-NEXT:    pcmpeqd %xmm9, %xmm0
4237; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
4238; SSE2-NEXT:    pand %xmm10, %xmm3
4239; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
4240; SSE2-NEXT:    por %xmm3, %xmm0
4241; SSE2-NEXT:    pand %xmm0, %xmm13
4242; SSE2-NEXT:    pandn %xmm8, %xmm0
4243; SSE2-NEXT:    por %xmm13, %xmm0
4244; SSE2-NEXT:    packssdw %xmm2, %xmm0
4245; SSE2-NEXT:    movdqa %xmm15, %xmm2
4246; SSE2-NEXT:    pxor %xmm1, %xmm2
4247; SSE2-NEXT:    movdqa %xmm2, %xmm3
4248; SSE2-NEXT:    pcmpgtd %xmm9, %xmm3
4249; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[0,0,2,2]
4250; SSE2-NEXT:    pcmpeqd %xmm9, %xmm2
4251; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
4252; SSE2-NEXT:    pand %xmm7, %xmm2
4253; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
4254; SSE2-NEXT:    por %xmm2, %xmm3
4255; SSE2-NEXT:    pand %xmm3, %xmm15
4256; SSE2-NEXT:    pandn %xmm8, %xmm3
4257; SSE2-NEXT:    por %xmm15, %xmm3
4258; SSE2-NEXT:    movdqa %xmm6, %xmm2
4259; SSE2-NEXT:    pxor %xmm1, %xmm2
4260; SSE2-NEXT:    movdqa %xmm2, %xmm7
4261; SSE2-NEXT:    pcmpgtd %xmm9, %xmm7
4262; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
4263; SSE2-NEXT:    pcmpeqd %xmm9, %xmm2
4264; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
4265; SSE2-NEXT:    pand %xmm10, %xmm2
4266; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
4267; SSE2-NEXT:    por %xmm2, %xmm7
4268; SSE2-NEXT:    pand %xmm7, %xmm6
4269; SSE2-NEXT:    pandn %xmm8, %xmm7
4270; SSE2-NEXT:    por %xmm6, %xmm7
4271; SSE2-NEXT:    packssdw %xmm3, %xmm7
4272; SSE2-NEXT:    packssdw %xmm7, %xmm0
4273; SSE2-NEXT:    movdqa %xmm5, %xmm2
4274; SSE2-NEXT:    pxor %xmm1, %xmm2
4275; SSE2-NEXT:    movdqa %xmm2, %xmm3
4276; SSE2-NEXT:    pcmpgtd %xmm9, %xmm3
4277; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
4278; SSE2-NEXT:    pcmpeqd %xmm9, %xmm2
4279; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
4280; SSE2-NEXT:    pand %xmm6, %xmm2
4281; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
4282; SSE2-NEXT:    por %xmm2, %xmm3
4283; SSE2-NEXT:    pand %xmm3, %xmm5
4284; SSE2-NEXT:    pandn %xmm8, %xmm3
4285; SSE2-NEXT:    por %xmm5, %xmm3
4286; SSE2-NEXT:    movdqa %xmm4, %xmm2
4287; SSE2-NEXT:    pxor %xmm1, %xmm2
4288; SSE2-NEXT:    movdqa %xmm2, %xmm5
4289; SSE2-NEXT:    pcmpgtd %xmm9, %xmm5
4290; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
4291; SSE2-NEXT:    pcmpeqd %xmm9, %xmm2
4292; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
4293; SSE2-NEXT:    pand %xmm6, %xmm7
4294; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
4295; SSE2-NEXT:    por %xmm7, %xmm2
4296; SSE2-NEXT:    pand %xmm2, %xmm4
4297; SSE2-NEXT:    pandn %xmm8, %xmm2
4298; SSE2-NEXT:    por %xmm4, %xmm2
4299; SSE2-NEXT:    packssdw %xmm3, %xmm2
4300; SSE2-NEXT:    movdqa %xmm12, %xmm3
4301; SSE2-NEXT:    pxor %xmm1, %xmm3
4302; SSE2-NEXT:    movdqa %xmm3, %xmm4
4303; SSE2-NEXT:    pcmpgtd %xmm9, %xmm4
4304; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
4305; SSE2-NEXT:    pcmpeqd %xmm9, %xmm3
4306; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
4307; SSE2-NEXT:    pand %xmm5, %xmm3
4308; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
4309; SSE2-NEXT:    por %xmm3, %xmm4
4310; SSE2-NEXT:    pand %xmm4, %xmm12
4311; SSE2-NEXT:    pandn %xmm8, %xmm4
4312; SSE2-NEXT:    por %xmm12, %xmm4
4313; SSE2-NEXT:    pxor %xmm11, %xmm1
4314; SSE2-NEXT:    movdqa %xmm1, %xmm3
4315; SSE2-NEXT:    pcmpgtd %xmm9, %xmm3
4316; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
4317; SSE2-NEXT:    pcmpeqd %xmm9, %xmm1
4318; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
4319; SSE2-NEXT:    pand %xmm5, %xmm1
4320; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
4321; SSE2-NEXT:    por %xmm1, %xmm3
4322; SSE2-NEXT:    pand %xmm3, %xmm11
4323; SSE2-NEXT:    pandn %xmm8, %xmm3
4324; SSE2-NEXT:    por %xmm11, %xmm3
4325; SSE2-NEXT:    packssdw %xmm4, %xmm3
4326; SSE2-NEXT:    packssdw %xmm3, %xmm2
4327; SSE2-NEXT:    packsswb %xmm2, %xmm0
4328; SSE2-NEXT:    retq
4329;
4330; SSSE3-LABEL: trunc_ssat_v16i64_v16i8:
4331; SSSE3:       # %bb.0:
4332; SSSE3-NEXT:    movdqa (%rdi), %xmm10
4333; SSSE3-NEXT:    movdqa 16(%rdi), %xmm9
4334; SSSE3-NEXT:    movdqa 32(%rdi), %xmm15
4335; SSSE3-NEXT:    movdqa 48(%rdi), %xmm13
4336; SSSE3-NEXT:    movdqa 80(%rdi), %xmm6
4337; SSSE3-NEXT:    movdqa 64(%rdi), %xmm3
4338; SSSE3-NEXT:    movdqa 112(%rdi), %xmm4
4339; SSSE3-NEXT:    movdqa 96(%rdi), %xmm7
4340; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [127,127]
4341; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
4342; SSSE3-NEXT:    movdqa %xmm7, %xmm5
4343; SSSE3-NEXT:    pxor %xmm1, %xmm5
4344; SSSE3-NEXT:    movdqa {{.*#+}} xmm14 = [2147483775,2147483775]
4345; SSSE3-NEXT:    movdqa %xmm14, %xmm0
4346; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm0
4347; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
4348; SSSE3-NEXT:    pcmpeqd %xmm14, %xmm5
4349; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
4350; SSSE3-NEXT:    pand %xmm2, %xmm5
4351; SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm0[1,1,3,3]
4352; SSSE3-NEXT:    por %xmm5, %xmm11
4353; SSSE3-NEXT:    pand %xmm11, %xmm7
4354; SSSE3-NEXT:    pandn %xmm8, %xmm11
4355; SSSE3-NEXT:    por %xmm7, %xmm11
4356; SSSE3-NEXT:    movdqa %xmm4, %xmm0
4357; SSSE3-NEXT:    pxor %xmm1, %xmm0
4358; SSSE3-NEXT:    movdqa %xmm14, %xmm2
4359; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
4360; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2]
4361; SSSE3-NEXT:    pcmpeqd %xmm14, %xmm0
4362; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4363; SSSE3-NEXT:    pand %xmm5, %xmm0
4364; SSSE3-NEXT:    pshufd {{.*#+}} xmm12 = xmm2[1,1,3,3]
4365; SSSE3-NEXT:    por %xmm0, %xmm12
4366; SSSE3-NEXT:    pand %xmm12, %xmm4
4367; SSSE3-NEXT:    pandn %xmm8, %xmm12
4368; SSSE3-NEXT:    por %xmm4, %xmm12
4369; SSSE3-NEXT:    movdqa %xmm3, %xmm0
4370; SSSE3-NEXT:    pxor %xmm1, %xmm0
4371; SSSE3-NEXT:    movdqa %xmm14, %xmm2
4372; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
4373; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
4374; SSSE3-NEXT:    pcmpeqd %xmm14, %xmm0
4375; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4376; SSSE3-NEXT:    pand %xmm4, %xmm0
4377; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
4378; SSSE3-NEXT:    por %xmm0, %xmm4
4379; SSSE3-NEXT:    pand %xmm4, %xmm3
4380; SSSE3-NEXT:    pandn %xmm8, %xmm4
4381; SSSE3-NEXT:    por %xmm3, %xmm4
4382; SSSE3-NEXT:    movdqa %xmm6, %xmm0
4383; SSSE3-NEXT:    pxor %xmm1, %xmm0
4384; SSSE3-NEXT:    movdqa %xmm14, %xmm2
4385; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
4386; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
4387; SSSE3-NEXT:    pcmpeqd %xmm14, %xmm0
4388; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4389; SSSE3-NEXT:    pand %xmm3, %xmm0
4390; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
4391; SSSE3-NEXT:    por %xmm0, %xmm5
4392; SSSE3-NEXT:    pand %xmm5, %xmm6
4393; SSSE3-NEXT:    pandn %xmm8, %xmm5
4394; SSSE3-NEXT:    por %xmm6, %xmm5
4395; SSSE3-NEXT:    movdqa %xmm15, %xmm0
4396; SSSE3-NEXT:    pxor %xmm1, %xmm0
4397; SSSE3-NEXT:    movdqa %xmm14, %xmm2
4398; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
4399; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
4400; SSSE3-NEXT:    pcmpeqd %xmm14, %xmm0
4401; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4402; SSSE3-NEXT:    pand %xmm3, %xmm0
4403; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm2[1,1,3,3]
4404; SSSE3-NEXT:    por %xmm0, %xmm6
4405; SSSE3-NEXT:    pand %xmm6, %xmm15
4406; SSSE3-NEXT:    pandn %xmm8, %xmm6
4407; SSSE3-NEXT:    por %xmm15, %xmm6
4408; SSSE3-NEXT:    movdqa %xmm13, %xmm0
4409; SSSE3-NEXT:    pxor %xmm1, %xmm0
4410; SSSE3-NEXT:    movdqa %xmm14, %xmm2
4411; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
4412; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
4413; SSSE3-NEXT:    pcmpeqd %xmm14, %xmm0
4414; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4415; SSSE3-NEXT:    pand %xmm3, %xmm0
4416; SSSE3-NEXT:    pshufd {{.*#+}} xmm15 = xmm2[1,1,3,3]
4417; SSSE3-NEXT:    por %xmm0, %xmm15
4418; SSSE3-NEXT:    pand %xmm15, %xmm13
4419; SSSE3-NEXT:    pandn %xmm8, %xmm15
4420; SSSE3-NEXT:    por %xmm13, %xmm15
4421; SSSE3-NEXT:    movdqa %xmm10, %xmm0
4422; SSSE3-NEXT:    pxor %xmm1, %xmm0
4423; SSSE3-NEXT:    movdqa %xmm14, %xmm3
4424; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm3
4425; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[0,0,2,2]
4426; SSSE3-NEXT:    pcmpeqd %xmm14, %xmm0
4427; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4428; SSSE3-NEXT:    pand %xmm7, %xmm0
4429; SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm3[1,1,3,3]
4430; SSSE3-NEXT:    por %xmm0, %xmm13
4431; SSSE3-NEXT:    pand %xmm13, %xmm10
4432; SSSE3-NEXT:    pandn %xmm8, %xmm13
4433; SSSE3-NEXT:    por %xmm10, %xmm13
4434; SSSE3-NEXT:    movdqa %xmm9, %xmm0
4435; SSSE3-NEXT:    pxor %xmm1, %xmm0
4436; SSSE3-NEXT:    movdqa %xmm14, %xmm7
4437; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm7
4438; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
4439; SSSE3-NEXT:    pcmpeqd %xmm14, %xmm0
4440; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4441; SSSE3-NEXT:    pand %xmm10, %xmm0
4442; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
4443; SSSE3-NEXT:    por %xmm0, %xmm7
4444; SSSE3-NEXT:    pand %xmm7, %xmm9
4445; SSSE3-NEXT:    pandn %xmm8, %xmm7
4446; SSSE3-NEXT:    por %xmm9, %xmm7
4447; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488]
4448; SSSE3-NEXT:    movdqa %xmm7, %xmm0
4449; SSSE3-NEXT:    pxor %xmm1, %xmm0
4450; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840]
4451; SSSE3-NEXT:    movdqa %xmm0, %xmm2
4452; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm2
4453; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm2[0,0,2,2]
4454; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
4455; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
4456; SSSE3-NEXT:    pand %xmm10, %xmm0
4457; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
4458; SSSE3-NEXT:    por %xmm0, %xmm2
4459; SSSE3-NEXT:    pand %xmm2, %xmm7
4460; SSSE3-NEXT:    pandn %xmm8, %xmm2
4461; SSSE3-NEXT:    por %xmm7, %xmm2
4462; SSSE3-NEXT:    movdqa %xmm13, %xmm0
4463; SSSE3-NEXT:    pxor %xmm1, %xmm0
4464; SSSE3-NEXT:    movdqa %xmm0, %xmm7
4465; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm7
4466; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
4467; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm0
4468; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
4469; SSSE3-NEXT:    pand %xmm10, %xmm3
4470; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
4471; SSSE3-NEXT:    por %xmm3, %xmm0
4472; SSSE3-NEXT:    pand %xmm0, %xmm13
4473; SSSE3-NEXT:    pandn %xmm8, %xmm0
4474; SSSE3-NEXT:    por %xmm13, %xmm0
4475; SSSE3-NEXT:    packssdw %xmm2, %xmm0
4476; SSSE3-NEXT:    movdqa %xmm15, %xmm2
4477; SSSE3-NEXT:    pxor %xmm1, %xmm2
4478; SSSE3-NEXT:    movdqa %xmm2, %xmm3
4479; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm3
4480; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[0,0,2,2]
4481; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm2
4482; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
4483; SSSE3-NEXT:    pand %xmm7, %xmm2
4484; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
4485; SSSE3-NEXT:    por %xmm2, %xmm3
4486; SSSE3-NEXT:    pand %xmm3, %xmm15
4487; SSSE3-NEXT:    pandn %xmm8, %xmm3
4488; SSSE3-NEXT:    por %xmm15, %xmm3
4489; SSSE3-NEXT:    movdqa %xmm6, %xmm2
4490; SSSE3-NEXT:    pxor %xmm1, %xmm2
4491; SSSE3-NEXT:    movdqa %xmm2, %xmm7
4492; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm7
4493; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
4494; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm2
4495; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
4496; SSSE3-NEXT:    pand %xmm10, %xmm2
4497; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
4498; SSSE3-NEXT:    por %xmm2, %xmm7
4499; SSSE3-NEXT:    pand %xmm7, %xmm6
4500; SSSE3-NEXT:    pandn %xmm8, %xmm7
4501; SSSE3-NEXT:    por %xmm6, %xmm7
4502; SSSE3-NEXT:    packssdw %xmm3, %xmm7
4503; SSSE3-NEXT:    packssdw %xmm7, %xmm0
4504; SSSE3-NEXT:    movdqa %xmm5, %xmm2
4505; SSSE3-NEXT:    pxor %xmm1, %xmm2
4506; SSSE3-NEXT:    movdqa %xmm2, %xmm3
4507; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm3
4508; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
4509; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm2
4510; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
4511; SSSE3-NEXT:    pand %xmm6, %xmm2
4512; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
4513; SSSE3-NEXT:    por %xmm2, %xmm3
4514; SSSE3-NEXT:    pand %xmm3, %xmm5
4515; SSSE3-NEXT:    pandn %xmm8, %xmm3
4516; SSSE3-NEXT:    por %xmm5, %xmm3
4517; SSSE3-NEXT:    movdqa %xmm4, %xmm2
4518; SSSE3-NEXT:    pxor %xmm1, %xmm2
4519; SSSE3-NEXT:    movdqa %xmm2, %xmm5
4520; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm5
4521; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
4522; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm2
4523; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
4524; SSSE3-NEXT:    pand %xmm6, %xmm7
4525; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
4526; SSSE3-NEXT:    por %xmm7, %xmm2
4527; SSSE3-NEXT:    pand %xmm2, %xmm4
4528; SSSE3-NEXT:    pandn %xmm8, %xmm2
4529; SSSE3-NEXT:    por %xmm4, %xmm2
4530; SSSE3-NEXT:    packssdw %xmm3, %xmm2
4531; SSSE3-NEXT:    movdqa %xmm12, %xmm3
4532; SSSE3-NEXT:    pxor %xmm1, %xmm3
4533; SSSE3-NEXT:    movdqa %xmm3, %xmm4
4534; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm4
4535; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
4536; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm3
4537; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
4538; SSSE3-NEXT:    pand %xmm5, %xmm3
4539; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
4540; SSSE3-NEXT:    por %xmm3, %xmm4
4541; SSSE3-NEXT:    pand %xmm4, %xmm12
4542; SSSE3-NEXT:    pandn %xmm8, %xmm4
4543; SSSE3-NEXT:    por %xmm12, %xmm4
4544; SSSE3-NEXT:    pxor %xmm11, %xmm1
4545; SSSE3-NEXT:    movdqa %xmm1, %xmm3
4546; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm3
4547; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
4548; SSSE3-NEXT:    pcmpeqd %xmm9, %xmm1
4549; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
4550; SSSE3-NEXT:    pand %xmm5, %xmm1
4551; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
4552; SSSE3-NEXT:    por %xmm1, %xmm3
4553; SSSE3-NEXT:    pand %xmm3, %xmm11
4554; SSSE3-NEXT:    pandn %xmm8, %xmm3
4555; SSSE3-NEXT:    por %xmm11, %xmm3
4556; SSSE3-NEXT:    packssdw %xmm4, %xmm3
4557; SSSE3-NEXT:    packssdw %xmm3, %xmm2
4558; SSSE3-NEXT:    packsswb %xmm2, %xmm0
4559; SSSE3-NEXT:    retq
4560;
4561; SSE41-LABEL: trunc_ssat_v16i64_v16i8:
4562; SSE41:       # %bb.0:
4563; SSE41-NEXT:    movdqa (%rdi), %xmm11
4564; SSE41-NEXT:    movdqa 16(%rdi), %xmm9
4565; SSE41-NEXT:    movdqa 32(%rdi), %xmm15
4566; SSE41-NEXT:    movdqa 48(%rdi), %xmm12
4567; SSE41-NEXT:    movdqa 80(%rdi), %xmm4
4568; SSE41-NEXT:    movdqa 64(%rdi), %xmm14
4569; SSE41-NEXT:    movdqa 112(%rdi), %xmm13
4570; SSE41-NEXT:    movdqa 96(%rdi), %xmm3
4571; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [127,127]
4572; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
4573; SSE41-NEXT:    movdqa %xmm3, %xmm0
4574; SSE41-NEXT:    pxor %xmm2, %xmm0
4575; SSE41-NEXT:    movdqa {{.*#+}} xmm7 = [2147483775,2147483775]
4576; SSE41-NEXT:    movdqa %xmm7, %xmm5
4577; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
4578; SSE41-NEXT:    movdqa %xmm7, %xmm6
4579; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
4580; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
4581; SSE41-NEXT:    pand %xmm5, %xmm0
4582; SSE41-NEXT:    por %xmm6, %xmm0
4583; SSE41-NEXT:    movapd %xmm1, %xmm8
4584; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm8
4585; SSE41-NEXT:    movdqa %xmm13, %xmm0
4586; SSE41-NEXT:    pxor %xmm2, %xmm0
4587; SSE41-NEXT:    movdqa %xmm7, %xmm3
4588; SSE41-NEXT:    pcmpeqd %xmm0, %xmm3
4589; SSE41-NEXT:    movdqa %xmm7, %xmm5
4590; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
4591; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
4592; SSE41-NEXT:    pand %xmm3, %xmm0
4593; SSE41-NEXT:    por %xmm5, %xmm0
4594; SSE41-NEXT:    movapd %xmm1, %xmm10
4595; SSE41-NEXT:    blendvpd %xmm0, %xmm13, %xmm10
4596; SSE41-NEXT:    movdqa %xmm14, %xmm0
4597; SSE41-NEXT:    pxor %xmm2, %xmm0
4598; SSE41-NEXT:    movdqa %xmm7, %xmm3
4599; SSE41-NEXT:    pcmpeqd %xmm0, %xmm3
4600; SSE41-NEXT:    movdqa %xmm7, %xmm5
4601; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
4602; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
4603; SSE41-NEXT:    pand %xmm3, %xmm0
4604; SSE41-NEXT:    por %xmm5, %xmm0
4605; SSE41-NEXT:    movapd %xmm1, %xmm13
4606; SSE41-NEXT:    blendvpd %xmm0, %xmm14, %xmm13
4607; SSE41-NEXT:    movdqa %xmm4, %xmm0
4608; SSE41-NEXT:    pxor %xmm2, %xmm0
4609; SSE41-NEXT:    movdqa %xmm7, %xmm3
4610; SSE41-NEXT:    pcmpeqd %xmm0, %xmm3
4611; SSE41-NEXT:    movdqa %xmm7, %xmm5
4612; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
4613; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
4614; SSE41-NEXT:    pand %xmm3, %xmm0
4615; SSE41-NEXT:    por %xmm5, %xmm0
4616; SSE41-NEXT:    movapd %xmm1, %xmm14
4617; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm14
4618; SSE41-NEXT:    movdqa %xmm15, %xmm0
4619; SSE41-NEXT:    pxor %xmm2, %xmm0
4620; SSE41-NEXT:    movdqa %xmm7, %xmm3
4621; SSE41-NEXT:    pcmpeqd %xmm0, %xmm3
4622; SSE41-NEXT:    movdqa %xmm7, %xmm4
4623; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
4624; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
4625; SSE41-NEXT:    pand %xmm3, %xmm0
4626; SSE41-NEXT:    por %xmm4, %xmm0
4627; SSE41-NEXT:    movapd %xmm1, %xmm4
4628; SSE41-NEXT:    blendvpd %xmm0, %xmm15, %xmm4
4629; SSE41-NEXT:    movdqa %xmm12, %xmm0
4630; SSE41-NEXT:    pxor %xmm2, %xmm0
4631; SSE41-NEXT:    movdqa %xmm7, %xmm3
4632; SSE41-NEXT:    pcmpeqd %xmm0, %xmm3
4633; SSE41-NEXT:    movdqa %xmm7, %xmm5
4634; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
4635; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
4636; SSE41-NEXT:    pand %xmm3, %xmm0
4637; SSE41-NEXT:    por %xmm5, %xmm0
4638; SSE41-NEXT:    movapd %xmm1, %xmm15
4639; SSE41-NEXT:    blendvpd %xmm0, %xmm12, %xmm15
4640; SSE41-NEXT:    movdqa %xmm11, %xmm0
4641; SSE41-NEXT:    pxor %xmm2, %xmm0
4642; SSE41-NEXT:    movdqa %xmm7, %xmm3
4643; SSE41-NEXT:    pcmpeqd %xmm0, %xmm3
4644; SSE41-NEXT:    movdqa %xmm7, %xmm6
4645; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
4646; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
4647; SSE41-NEXT:    pand %xmm3, %xmm0
4648; SSE41-NEXT:    por %xmm6, %xmm0
4649; SSE41-NEXT:    movapd %xmm1, %xmm6
4650; SSE41-NEXT:    blendvpd %xmm0, %xmm11, %xmm6
4651; SSE41-NEXT:    movdqa %xmm9, %xmm0
4652; SSE41-NEXT:    pxor %xmm2, %xmm0
4653; SSE41-NEXT:    movdqa %xmm7, %xmm3
4654; SSE41-NEXT:    pcmpeqd %xmm0, %xmm3
4655; SSE41-NEXT:    pcmpgtd %xmm0, %xmm7
4656; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
4657; SSE41-NEXT:    pand %xmm3, %xmm0
4658; SSE41-NEXT:    por %xmm7, %xmm0
4659; SSE41-NEXT:    blendvpd %xmm0, %xmm9, %xmm1
4660; SSE41-NEXT:    movapd {{.*#+}} xmm7 = [18446744073709551488,18446744073709551488]
4661; SSE41-NEXT:    movapd %xmm1, %xmm5
4662; SSE41-NEXT:    xorpd %xmm2, %xmm5
4663; SSE41-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840]
4664; SSE41-NEXT:    movapd %xmm5, %xmm3
4665; SSE41-NEXT:    pcmpeqd %xmm9, %xmm3
4666; SSE41-NEXT:    pcmpgtd %xmm9, %xmm5
4667; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
4668; SSE41-NEXT:    pand %xmm3, %xmm0
4669; SSE41-NEXT:    por %xmm5, %xmm0
4670; SSE41-NEXT:    movapd %xmm7, %xmm3
4671; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
4672; SSE41-NEXT:    movapd %xmm6, %xmm1
4673; SSE41-NEXT:    xorpd %xmm2, %xmm1
4674; SSE41-NEXT:    movapd %xmm1, %xmm5
4675; SSE41-NEXT:    pcmpeqd %xmm9, %xmm5
4676; SSE41-NEXT:    pcmpgtd %xmm9, %xmm1
4677; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
4678; SSE41-NEXT:    pand %xmm5, %xmm0
4679; SSE41-NEXT:    por %xmm1, %xmm0
4680; SSE41-NEXT:    movapd %xmm7, %xmm1
4681; SSE41-NEXT:    blendvpd %xmm0, %xmm6, %xmm1
4682; SSE41-NEXT:    packssdw %xmm3, %xmm1
4683; SSE41-NEXT:    movapd %xmm15, %xmm3
4684; SSE41-NEXT:    xorpd %xmm2, %xmm3
4685; SSE41-NEXT:    movapd %xmm3, %xmm5
4686; SSE41-NEXT:    pcmpeqd %xmm9, %xmm5
4687; SSE41-NEXT:    pcmpgtd %xmm9, %xmm3
4688; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
4689; SSE41-NEXT:    pand %xmm5, %xmm0
4690; SSE41-NEXT:    por %xmm3, %xmm0
4691; SSE41-NEXT:    movapd %xmm7, %xmm3
4692; SSE41-NEXT:    blendvpd %xmm0, %xmm15, %xmm3
4693; SSE41-NEXT:    movapd %xmm4, %xmm5
4694; SSE41-NEXT:    xorpd %xmm2, %xmm5
4695; SSE41-NEXT:    movapd %xmm5, %xmm6
4696; SSE41-NEXT:    pcmpeqd %xmm9, %xmm6
4697; SSE41-NEXT:    pcmpgtd %xmm9, %xmm5
4698; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
4699; SSE41-NEXT:    pand %xmm6, %xmm0
4700; SSE41-NEXT:    por %xmm5, %xmm0
4701; SSE41-NEXT:    movapd %xmm7, %xmm5
4702; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm5
4703; SSE41-NEXT:    packssdw %xmm3, %xmm5
4704; SSE41-NEXT:    packssdw %xmm5, %xmm1
4705; SSE41-NEXT:    movapd %xmm14, %xmm3
4706; SSE41-NEXT:    xorpd %xmm2, %xmm3
4707; SSE41-NEXT:    movapd %xmm3, %xmm4
4708; SSE41-NEXT:    pcmpeqd %xmm9, %xmm4
4709; SSE41-NEXT:    pcmpgtd %xmm9, %xmm3
4710; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
4711; SSE41-NEXT:    pand %xmm4, %xmm0
4712; SSE41-NEXT:    por %xmm3, %xmm0
4713; SSE41-NEXT:    movapd %xmm7, %xmm3
4714; SSE41-NEXT:    blendvpd %xmm0, %xmm14, %xmm3
4715; SSE41-NEXT:    movapd %xmm13, %xmm4
4716; SSE41-NEXT:    xorpd %xmm2, %xmm4
4717; SSE41-NEXT:    movapd %xmm4, %xmm5
4718; SSE41-NEXT:    pcmpeqd %xmm9, %xmm5
4719; SSE41-NEXT:    pcmpgtd %xmm9, %xmm4
4720; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
4721; SSE41-NEXT:    pand %xmm5, %xmm0
4722; SSE41-NEXT:    por %xmm4, %xmm0
4723; SSE41-NEXT:    movapd %xmm7, %xmm4
4724; SSE41-NEXT:    blendvpd %xmm0, %xmm13, %xmm4
4725; SSE41-NEXT:    packssdw %xmm3, %xmm4
4726; SSE41-NEXT:    movapd %xmm10, %xmm3
4727; SSE41-NEXT:    xorpd %xmm2, %xmm3
4728; SSE41-NEXT:    movapd %xmm3, %xmm5
4729; SSE41-NEXT:    pcmpeqd %xmm9, %xmm5
4730; SSE41-NEXT:    pcmpgtd %xmm9, %xmm3
4731; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
4732; SSE41-NEXT:    pand %xmm5, %xmm0
4733; SSE41-NEXT:    por %xmm3, %xmm0
4734; SSE41-NEXT:    movapd %xmm7, %xmm3
4735; SSE41-NEXT:    blendvpd %xmm0, %xmm10, %xmm3
4736; SSE41-NEXT:    xorpd %xmm8, %xmm2
4737; SSE41-NEXT:    movapd %xmm2, %xmm5
4738; SSE41-NEXT:    pcmpeqd %xmm9, %xmm5
4739; SSE41-NEXT:    pcmpgtd %xmm9, %xmm2
4740; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
4741; SSE41-NEXT:    pand %xmm5, %xmm0
4742; SSE41-NEXT:    por %xmm2, %xmm0
4743; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm7
4744; SSE41-NEXT:    packssdw %xmm3, %xmm7
4745; SSE41-NEXT:    packssdw %xmm7, %xmm4
4746; SSE41-NEXT:    packsswb %xmm4, %xmm1
4747; SSE41-NEXT:    movdqa %xmm1, %xmm0
4748; SSE41-NEXT:    retq
4749;
4750; AVX1-LABEL: trunc_ssat_v16i64_v16i8:
4751; AVX1:       # %bb.0:
4752; AVX1-NEXT:    vmovdqa 112(%rdi), %xmm8
4753; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127]
4754; AVX1-NEXT:    vmovdqa 96(%rdi), %xmm9
4755; AVX1-NEXT:    vmovdqa 80(%rdi), %xmm3
4756; AVX1-NEXT:    vmovdqa 64(%rdi), %xmm4
4757; AVX1-NEXT:    vmovdqa (%rdi), %xmm5
4758; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm6
4759; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm7
4760; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm0
4761; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm1, %xmm2
4762; AVX1-NEXT:    vblendvpd %xmm2, %xmm5, %xmm1, %xmm10
4763; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm1, %xmm5
4764; AVX1-NEXT:    vblendvpd %xmm5, %xmm6, %xmm1, %xmm11
4765; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm1, %xmm6
4766; AVX1-NEXT:    vblendvpd %xmm6, %xmm7, %xmm1, %xmm6
4767; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm7
4768; AVX1-NEXT:    vblendvpd %xmm7, %xmm0, %xmm1, %xmm0
4769; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm1, %xmm7
4770; AVX1-NEXT:    vblendvpd %xmm7, %xmm4, %xmm1, %xmm4
4771; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm1, %xmm7
4772; AVX1-NEXT:    vblendvpd %xmm7, %xmm3, %xmm1, %xmm3
4773; AVX1-NEXT:    vpcmpgtq %xmm9, %xmm1, %xmm7
4774; AVX1-NEXT:    vblendvpd %xmm7, %xmm9, %xmm1, %xmm7
4775; AVX1-NEXT:    vpcmpgtq %xmm8, %xmm1, %xmm2
4776; AVX1-NEXT:    vblendvpd %xmm2, %xmm8, %xmm1, %xmm1
4777; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [18446744073709551488,18446744073709551488]
4778; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm1, %xmm5
4779; AVX1-NEXT:    vblendvpd %xmm5, %xmm1, %xmm2, %xmm8
4780; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm7, %xmm5
4781; AVX1-NEXT:    vblendvpd %xmm5, %xmm7, %xmm2, %xmm5
4782; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm7
4783; AVX1-NEXT:    vblendvpd %xmm7, %xmm3, %xmm2, %xmm3
4784; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm7
4785; AVX1-NEXT:    vblendvpd %xmm7, %xmm4, %xmm2, %xmm4
4786; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm7
4787; AVX1-NEXT:    vblendvpd %xmm7, %xmm0, %xmm2, %xmm0
4788; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm6, %xmm7
4789; AVX1-NEXT:    vblendvpd %xmm7, %xmm6, %xmm2, %xmm6
4790; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm11, %xmm7
4791; AVX1-NEXT:    vblendvpd %xmm7, %xmm11, %xmm2, %xmm7
4792; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm10, %xmm1
4793; AVX1-NEXT:    vblendvpd %xmm1, %xmm10, %xmm2, %xmm1
4794; AVX1-NEXT:    vpackssdw %xmm8, %xmm5, %xmm2
4795; AVX1-NEXT:    vpackssdw %xmm3, %xmm4, %xmm3
4796; AVX1-NEXT:    vpackssdw %xmm2, %xmm3, %xmm2
4797; AVX1-NEXT:    vpackssdw %xmm0, %xmm6, %xmm0
4798; AVX1-NEXT:    vpackssdw %xmm7, %xmm1, %xmm1
4799; AVX1-NEXT:    vpackssdw %xmm0, %xmm1, %xmm0
4800; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
4801; AVX1-NEXT:    retq
4802;
4803; AVX2-LABEL: trunc_ssat_v16i64_v16i8:
4804; AVX2:       # %bb.0:
4805; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
4806; AVX2-NEXT:    vmovdqa 32(%rdi), %ymm1
4807; AVX2-NEXT:    vmovdqa 64(%rdi), %ymm2
4808; AVX2-NEXT:    vmovdqa 96(%rdi), %ymm3
4809; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm4 = [127,127,127,127]
4810; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm4, %ymm5
4811; AVX2-NEXT:    vblendvpd %ymm5, %ymm2, %ymm4, %ymm2
4812; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm4, %ymm5
4813; AVX2-NEXT:    vblendvpd %ymm5, %ymm3, %ymm4, %ymm3
4814; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm4, %ymm5
4815; AVX2-NEXT:    vblendvpd %ymm5, %ymm0, %ymm4, %ymm0
4816; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm4, %ymm5
4817; AVX2-NEXT:    vblendvpd %ymm5, %ymm1, %ymm4, %ymm1
4818; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm4 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
4819; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm1, %ymm5
4820; AVX2-NEXT:    vblendvpd %ymm5, %ymm1, %ymm4, %ymm1
4821; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm0, %ymm5
4822; AVX2-NEXT:    vblendvpd %ymm5, %ymm0, %ymm4, %ymm0
4823; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
4824; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm3, %ymm1
4825; AVX2-NEXT:    vblendvpd %ymm1, %ymm3, %ymm4, %ymm1
4826; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm2, %ymm3
4827; AVX2-NEXT:    vblendvpd %ymm3, %ymm2, %ymm4, %ymm2
4828; AVX2-NEXT:    vpackssdw %ymm1, %ymm2, %ymm1
4829; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3]
4830; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4831; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
4832; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
4833; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
4834; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
4835; AVX2-NEXT:    vzeroupper
4836; AVX2-NEXT:    retq
4837;
4838; AVX512-LABEL: trunc_ssat_v16i64_v16i8:
4839; AVX512:       # %bb.0:
4840; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
4841; AVX512-NEXT:    vmovdqa64 64(%rdi), %zmm1
4842; AVX512-NEXT:    vpmovsqb %zmm1, %xmm1
4843; AVX512-NEXT:    vpmovsqb %zmm0, %xmm0
4844; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4845; AVX512-NEXT:    vzeroupper
4846; AVX512-NEXT:    retq
4847;
4848; SKX-LABEL: trunc_ssat_v16i64_v16i8:
4849; SKX:       # %bb.0:
4850; SKX-NEXT:    vmovdqa (%rdi), %ymm0
4851; SKX-NEXT:    vmovdqa 32(%rdi), %ymm1
4852; SKX-NEXT:    vmovdqa 64(%rdi), %ymm2
4853; SKX-NEXT:    vmovdqa 96(%rdi), %ymm3
4854; SKX-NEXT:    vpmovsqb %ymm3, %xmm3
4855; SKX-NEXT:    vpmovsqb %ymm2, %xmm2
4856; SKX-NEXT:    vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
4857; SKX-NEXT:    vpmovsqb %ymm1, %xmm1
4858; SKX-NEXT:    vpmovsqb %ymm0, %xmm0
4859; SKX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4860; SKX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
4861; SKX-NEXT:    vzeroupper
4862; SKX-NEXT:    retq
4863  %a0 = load <16 x i64>, <16 x i64>* %p0
4864  %1 = icmp slt <16 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
4865  %2 = select <16 x i1> %1, <16 x i64> %a0, <16 x i64> <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
4866  %3 = icmp sgt <16 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
4867  %4 = select <16 x i1> %3, <16 x i64> %2, <16 x i64> <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
4868  %5 = trunc <16 x i64> %4 to <16 x i8>
4869  ret <16 x i8> %5
4870}
4871
4872define <4 x i8> @trunc_ssat_v4i32_v4i8(<4 x i32> %a0) {
4873; SSE2-LABEL: trunc_ssat_v4i32_v4i8:
4874; SSE2:       # %bb.0:
4875; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [127,127,127,127]
4876; SSE2-NEXT:    movdqa %xmm1, %xmm2
4877; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
4878; SSE2-NEXT:    pand %xmm2, %xmm0
4879; SSE2-NEXT:    pandn %xmm1, %xmm2
4880; SSE2-NEXT:    por %xmm0, %xmm2
4881; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
4882; SSE2-NEXT:    movdqa %xmm2, %xmm0
4883; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
4884; SSE2-NEXT:    pand %xmm0, %xmm2
4885; SSE2-NEXT:    pandn %xmm1, %xmm0
4886; SSE2-NEXT:    por %xmm2, %xmm0
4887; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
4888; SSE2-NEXT:    packuswb %xmm0, %xmm0
4889; SSE2-NEXT:    packuswb %xmm0, %xmm0
4890; SSE2-NEXT:    retq
4891;
4892; SSSE3-LABEL: trunc_ssat_v4i32_v4i8:
4893; SSSE3:       # %bb.0:
4894; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [127,127,127,127]
4895; SSSE3-NEXT:    movdqa %xmm1, %xmm2
4896; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
4897; SSSE3-NEXT:    pand %xmm2, %xmm0
4898; SSSE3-NEXT:    pandn %xmm1, %xmm2
4899; SSSE3-NEXT:    por %xmm0, %xmm2
4900; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
4901; SSSE3-NEXT:    movdqa %xmm2, %xmm0
4902; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm0
4903; SSSE3-NEXT:    pand %xmm0, %xmm2
4904; SSSE3-NEXT:    pandn %xmm1, %xmm0
4905; SSSE3-NEXT:    por %xmm2, %xmm0
4906; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
4907; SSSE3-NEXT:    retq
4908;
4909; SSE41-LABEL: trunc_ssat_v4i32_v4i8:
4910; SSE41:       # %bb.0:
4911; SSE41-NEXT:    pminsd {{.*}}(%rip), %xmm0
4912; SSE41-NEXT:    pmaxsd {{.*}}(%rip), %xmm0
4913; SSE41-NEXT:    packssdw %xmm0, %xmm0
4914; SSE41-NEXT:    packsswb %xmm0, %xmm0
4915; SSE41-NEXT:    retq
4916;
4917; AVX1-LABEL: trunc_ssat_v4i32_v4i8:
4918; AVX1:       # %bb.0:
4919; AVX1-NEXT:    vpminsd {{.*}}(%rip), %xmm0, %xmm0
4920; AVX1-NEXT:    vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
4921; AVX1-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
4922; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
4923; AVX1-NEXT:    retq
4924;
4925; AVX2-LABEL: trunc_ssat_v4i32_v4i8:
4926; AVX2:       # %bb.0:
4927; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
4928; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
4929; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
4930; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
4931; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
4932; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
4933; AVX2-NEXT:    retq
4934;
4935; AVX512F-LABEL: trunc_ssat_v4i32_v4i8:
4936; AVX512F:       # %bb.0:
4937; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
4938; AVX512F-NEXT:    vpmovsdb %zmm0, %xmm0
4939; AVX512F-NEXT:    vzeroupper
4940; AVX512F-NEXT:    retq
4941;
4942; AVX512VL-LABEL: trunc_ssat_v4i32_v4i8:
4943; AVX512VL:       # %bb.0:
4944; AVX512VL-NEXT:    vpmovsdb %xmm0, %xmm0
4945; AVX512VL-NEXT:    retq
4946;
4947; AVX512BW-LABEL: trunc_ssat_v4i32_v4i8:
4948; AVX512BW:       # %bb.0:
4949; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
4950; AVX512BW-NEXT:    vpmovsdb %zmm0, %xmm0
4951; AVX512BW-NEXT:    vzeroupper
4952; AVX512BW-NEXT:    retq
4953;
4954; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i8:
4955; AVX512BWVL:       # %bb.0:
4956; AVX512BWVL-NEXT:    vpmovsdb %xmm0, %xmm0
4957; AVX512BWVL-NEXT:    retq
4958;
4959; SKX-LABEL: trunc_ssat_v4i32_v4i8:
4960; SKX:       # %bb.0:
4961; SKX-NEXT:    vpmovsdb %xmm0, %xmm0
4962; SKX-NEXT:    retq
4963  %1 = icmp slt <4 x i32> %a0, <i32 127, i32 127, i32 127, i32 127>
4964  %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 127, i32 127, i32 127, i32 127>
4965  %3 = icmp sgt <4 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128>
4966  %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>
4967  %5 = trunc <4 x i32> %4 to <4 x i8>
4968  ret <4 x i8> %5
4969}
4970
4971define void @trunc_ssat_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) {
4972; SSE2-LABEL: trunc_ssat_v4i32_v4i8_store:
4973; SSE2:       # %bb.0:
4974; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [127,127,127,127]
4975; SSE2-NEXT:    movdqa %xmm1, %xmm2
4976; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
4977; SSE2-NEXT:    pand %xmm2, %xmm0
4978; SSE2-NEXT:    pandn %xmm1, %xmm2
4979; SSE2-NEXT:    por %xmm0, %xmm2
4980; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [4294967168,4294967168,4294967168,4294967168]
4981; SSE2-NEXT:    movdqa %xmm2, %xmm1
4982; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
4983; SSE2-NEXT:    pand %xmm1, %xmm2
4984; SSE2-NEXT:    pandn %xmm0, %xmm1
4985; SSE2-NEXT:    por %xmm2, %xmm1
4986; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
4987; SSE2-NEXT:    packuswb %xmm1, %xmm1
4988; SSE2-NEXT:    packuswb %xmm1, %xmm1
4989; SSE2-NEXT:    movd %xmm1, (%rdi)
4990; SSE2-NEXT:    retq
4991;
4992; SSSE3-LABEL: trunc_ssat_v4i32_v4i8_store:
4993; SSSE3:       # %bb.0:
4994; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [127,127,127,127]
4995; SSSE3-NEXT:    movdqa %xmm1, %xmm2
4996; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
4997; SSSE3-NEXT:    pand %xmm2, %xmm0
4998; SSSE3-NEXT:    pandn %xmm1, %xmm2
4999; SSSE3-NEXT:    por %xmm0, %xmm2
5000; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [4294967168,4294967168,4294967168,4294967168]
5001; SSSE3-NEXT:    movdqa %xmm2, %xmm1
5002; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm1
5003; SSSE3-NEXT:    pand %xmm1, %xmm2
5004; SSSE3-NEXT:    pandn %xmm0, %xmm1
5005; SSSE3-NEXT:    por %xmm2, %xmm1
5006; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
5007; SSSE3-NEXT:    movd %xmm1, (%rdi)
5008; SSSE3-NEXT:    retq
5009;
5010; SSE41-LABEL: trunc_ssat_v4i32_v4i8_store:
5011; SSE41:       # %bb.0:
5012; SSE41-NEXT:    pminsd {{.*}}(%rip), %xmm0
5013; SSE41-NEXT:    pmaxsd {{.*}}(%rip), %xmm0
5014; SSE41-NEXT:    packssdw %xmm0, %xmm0
5015; SSE41-NEXT:    packsswb %xmm0, %xmm0
5016; SSE41-NEXT:    movd %xmm0, (%rdi)
5017; SSE41-NEXT:    retq
5018;
5019; AVX1-LABEL: trunc_ssat_v4i32_v4i8_store:
5020; AVX1:       # %bb.0:
5021; AVX1-NEXT:    vpminsd {{.*}}(%rip), %xmm0, %xmm0
5022; AVX1-NEXT:    vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
5023; AVX1-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
5024; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5025; AVX1-NEXT:    vmovd %xmm0, (%rdi)
5026; AVX1-NEXT:    retq
5027;
5028; AVX2-LABEL: trunc_ssat_v4i32_v4i8_store:
5029; AVX2:       # %bb.0:
5030; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
5031; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
5032; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
5033; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
5034; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
5035; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5036; AVX2-NEXT:    vmovd %xmm0, (%rdi)
5037; AVX2-NEXT:    retq
5038;
5039; AVX512F-LABEL: trunc_ssat_v4i32_v4i8_store:
5040; AVX512F:       # %bb.0:
5041; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5042; AVX512F-NEXT:    vpmovsdb %zmm0, %xmm0
5043; AVX512F-NEXT:    vmovd %xmm0, (%rdi)
5044; AVX512F-NEXT:    vzeroupper
5045; AVX512F-NEXT:    retq
5046;
5047; AVX512VL-LABEL: trunc_ssat_v4i32_v4i8_store:
5048; AVX512VL:       # %bb.0:
5049; AVX512VL-NEXT:    vpmovsdb %xmm0, (%rdi)
5050; AVX512VL-NEXT:    retq
5051;
5052; AVX512BW-LABEL: trunc_ssat_v4i32_v4i8_store:
5053; AVX512BW:       # %bb.0:
5054; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5055; AVX512BW-NEXT:    vpmovsdb %zmm0, %xmm0
5056; AVX512BW-NEXT:    vmovd %xmm0, (%rdi)
5057; AVX512BW-NEXT:    vzeroupper
5058; AVX512BW-NEXT:    retq
5059;
5060; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i8_store:
5061; AVX512BWVL:       # %bb.0:
5062; AVX512BWVL-NEXT:    vpmovsdb %xmm0, (%rdi)
5063; AVX512BWVL-NEXT:    retq
5064;
5065; SKX-LABEL: trunc_ssat_v4i32_v4i8_store:
5066; SKX:       # %bb.0:
5067; SKX-NEXT:    vpmovsdb %xmm0, (%rdi)
5068; SKX-NEXT:    retq
5069  %1 = icmp slt <4 x i32> %a0, <i32 127, i32 127, i32 127, i32 127>
5070  %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 127, i32 127, i32 127, i32 127>
5071  %3 = icmp sgt <4 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128>
5072  %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>
5073  %5 = trunc <4 x i32> %4 to <4 x i8>
5074  store <4 x i8> %5, <4 x i8> *%p1
5075  ret void
5076}
5077
5078define <8 x i8> @trunc_ssat_v8i32_v8i8(<8 x i32> %a0) {
5079; SSE-LABEL: trunc_ssat_v8i32_v8i8:
5080; SSE:       # %bb.0:
5081; SSE-NEXT:    packssdw %xmm1, %xmm0
5082; SSE-NEXT:    packsswb %xmm0, %xmm0
5083; SSE-NEXT:    retq
5084;
5085; AVX1-LABEL: trunc_ssat_v8i32_v8i8:
5086; AVX1:       # %bb.0:
5087; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
5088; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
5089; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5090; AVX1-NEXT:    vzeroupper
5091; AVX1-NEXT:    retq
5092;
5093; AVX2-LABEL: trunc_ssat_v8i32_v8i8:
5094; AVX2:       # %bb.0:
5095; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
5096; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
5097; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5098; AVX2-NEXT:    vzeroupper
5099; AVX2-NEXT:    retq
5100;
5101; AVX512F-LABEL: trunc_ssat_v8i32_v8i8:
5102; AVX512F:       # %bb.0:
5103; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
5104; AVX512F-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
5105; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5106; AVX512F-NEXT:    vzeroupper
5107; AVX512F-NEXT:    retq
5108;
5109; AVX512VL-LABEL: trunc_ssat_v8i32_v8i8:
5110; AVX512VL:       # %bb.0:
5111; AVX512VL-NEXT:    vpmovsdb %ymm0, %xmm0
5112; AVX512VL-NEXT:    vzeroupper
5113; AVX512VL-NEXT:    retq
5114;
5115; AVX512BW-LABEL: trunc_ssat_v8i32_v8i8:
5116; AVX512BW:       # %bb.0:
5117; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
5118; AVX512BW-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
5119; AVX512BW-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5120; AVX512BW-NEXT:    vzeroupper
5121; AVX512BW-NEXT:    retq
5122;
5123; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i8:
5124; AVX512BWVL:       # %bb.0:
5125; AVX512BWVL-NEXT:    vpmovsdb %ymm0, %xmm0
5126; AVX512BWVL-NEXT:    vzeroupper
5127; AVX512BWVL-NEXT:    retq
5128;
5129; SKX-LABEL: trunc_ssat_v8i32_v8i8:
5130; SKX:       # %bb.0:
5131; SKX-NEXT:    vpmovsdb %ymm0, %xmm0
5132; SKX-NEXT:    vzeroupper
5133; SKX-NEXT:    retq
5134  %1 = icmp slt <8 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
5135  %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
5136  %3 = icmp sgt <8 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
5137  %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
5138  %5 = trunc <8 x i32> %4 to <8 x i8>
5139  ret <8 x i8> %5
5140}
5141
5142define void @trunc_ssat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
5143; SSE-LABEL: trunc_ssat_v8i32_v8i8_store:
5144; SSE:       # %bb.0:
5145; SSE-NEXT:    packssdw %xmm1, %xmm0
5146; SSE-NEXT:    packsswb %xmm0, %xmm0
5147; SSE-NEXT:    movq %xmm0, (%rdi)
5148; SSE-NEXT:    retq
5149;
5150; AVX1-LABEL: trunc_ssat_v8i32_v8i8_store:
5151; AVX1:       # %bb.0:
5152; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
5153; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
5154; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5155; AVX1-NEXT:    vmovq %xmm0, (%rdi)
5156; AVX1-NEXT:    vzeroupper
5157; AVX1-NEXT:    retq
5158;
5159; AVX2-LABEL: trunc_ssat_v8i32_v8i8_store:
5160; AVX2:       # %bb.0:
5161; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
5162; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
5163; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5164; AVX2-NEXT:    vmovq %xmm0, (%rdi)
5165; AVX2-NEXT:    vzeroupper
5166; AVX2-NEXT:    retq
5167;
5168; AVX512F-LABEL: trunc_ssat_v8i32_v8i8_store:
5169; AVX512F:       # %bb.0:
5170; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
5171; AVX512F-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
5172; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5173; AVX512F-NEXT:    vmovq %xmm0, (%rdi)
5174; AVX512F-NEXT:    vzeroupper
5175; AVX512F-NEXT:    retq
5176;
5177; AVX512VL-LABEL: trunc_ssat_v8i32_v8i8_store:
5178; AVX512VL:       # %bb.0:
5179; AVX512VL-NEXT:    vpmovsdb %ymm0, (%rdi)
5180; AVX512VL-NEXT:    vzeroupper
5181; AVX512VL-NEXT:    retq
5182;
5183; AVX512BW-LABEL: trunc_ssat_v8i32_v8i8_store:
5184; AVX512BW:       # %bb.0:
5185; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
5186; AVX512BW-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
5187; AVX512BW-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5188; AVX512BW-NEXT:    vmovq %xmm0, (%rdi)
5189; AVX512BW-NEXT:    vzeroupper
5190; AVX512BW-NEXT:    retq
5191;
5192; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i8_store:
5193; AVX512BWVL:       # %bb.0:
5194; AVX512BWVL-NEXT:    vpmovsdb %ymm0, (%rdi)
5195; AVX512BWVL-NEXT:    vzeroupper
5196; AVX512BWVL-NEXT:    retq
5197;
5198; SKX-LABEL: trunc_ssat_v8i32_v8i8_store:
5199; SKX:       # %bb.0:
5200; SKX-NEXT:    vpmovsdb %ymm0, (%rdi)
5201; SKX-NEXT:    vzeroupper
5202; SKX-NEXT:    retq
5203  %1 = icmp slt <8 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
5204  %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
5205  %3 = icmp sgt <8 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
5206  %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
5207  %5 = trunc <8 x i32> %4 to <8 x i8>
5208  store <8 x i8> %5, <8 x i8> *%p1
5209  ret void
5210}
5211
5212define <16 x i8> @trunc_ssat_v16i32_v16i8(<16 x i32>* %p0) "min-legal-vector-width"="256" {
5213; SSE-LABEL: trunc_ssat_v16i32_v16i8:
5214; SSE:       # %bb.0:
5215; SSE-NEXT:    movdqa (%rdi), %xmm0
5216; SSE-NEXT:    movdqa 32(%rdi), %xmm1
5217; SSE-NEXT:    packssdw 48(%rdi), %xmm1
5218; SSE-NEXT:    packssdw 16(%rdi), %xmm0
5219; SSE-NEXT:    packsswb %xmm1, %xmm0
5220; SSE-NEXT:    retq
5221;
5222; AVX1-LABEL: trunc_ssat_v16i32_v16i8:
5223; AVX1:       # %bb.0:
5224; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
5225; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm1
5226; AVX1-NEXT:    vpackssdw 48(%rdi), %xmm1, %xmm1
5227; AVX1-NEXT:    vpackssdw 16(%rdi), %xmm0, %xmm0
5228; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
5229; AVX1-NEXT:    retq
5230;
5231; AVX2-LABEL: trunc_ssat_v16i32_v16i8:
5232; AVX2:       # %bb.0:
5233; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
5234; AVX2-NEXT:    vpackssdw 32(%rdi), %ymm0, %ymm0
5235; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
5236; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
5237; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
5238; AVX2-NEXT:    vzeroupper
5239; AVX2-NEXT:    retq
5240;
5241; AVX512-LABEL: trunc_ssat_v16i32_v16i8:
5242; AVX512:       # %bb.0:
5243; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
5244; AVX512-NEXT:    vpmovsdb %zmm0, %xmm0
5245; AVX512-NEXT:    vzeroupper
5246; AVX512-NEXT:    retq
5247;
5248; SKX-LABEL: trunc_ssat_v16i32_v16i8:
5249; SKX:       # %bb.0:
5250; SKX-NEXT:    vmovdqa (%rdi), %ymm0
5251; SKX-NEXT:    vmovdqa 32(%rdi), %ymm1
5252; SKX-NEXT:    vpmovsdb %ymm1, %xmm1
5253; SKX-NEXT:    vpmovsdb %ymm0, %xmm0
5254; SKX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5255; SKX-NEXT:    vzeroupper
5256; SKX-NEXT:    retq
5257  %a0 = load <16 x i32>, <16 x i32>* %p0
5258  %1 = icmp slt <16 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
5259  %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
5260  %3 = icmp sgt <16 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
5261  %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
5262  %5 = trunc <16 x i32> %4 to <16 x i8>
5263  ret <16 x i8> %5
5264}
5265
5266define void @trunc_ssat_v16i32_v16i8_store(<16 x i32>* %p0, <16 x i8>* %p1) "min-legal-vector-width"="256" {
5267; SSE-LABEL: trunc_ssat_v16i32_v16i8_store:
5268; SSE:       # %bb.0:
5269; SSE-NEXT:    movdqa (%rdi), %xmm0
5270; SSE-NEXT:    movdqa 32(%rdi), %xmm1
5271; SSE-NEXT:    packssdw 48(%rdi), %xmm1
5272; SSE-NEXT:    packssdw 16(%rdi), %xmm0
5273; SSE-NEXT:    packsswb %xmm1, %xmm0
5274; SSE-NEXT:    movdqa %xmm0, (%rsi)
5275; SSE-NEXT:    retq
5276;
5277; AVX1-LABEL: trunc_ssat_v16i32_v16i8_store:
5278; AVX1:       # %bb.0:
5279; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
5280; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm1
5281; AVX1-NEXT:    vpackssdw 48(%rdi), %xmm1, %xmm1
5282; AVX1-NEXT:    vpackssdw 16(%rdi), %xmm0, %xmm0
5283; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
5284; AVX1-NEXT:    vmovdqa %xmm0, (%rsi)
5285; AVX1-NEXT:    retq
5286;
5287; AVX2-LABEL: trunc_ssat_v16i32_v16i8_store:
5288; AVX2:       # %bb.0:
5289; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
5290; AVX2-NEXT:    vpackssdw 32(%rdi), %ymm0, %ymm0
5291; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
5292; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
5293; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
5294; AVX2-NEXT:    vmovdqa %xmm0, (%rsi)
5295; AVX2-NEXT:    vzeroupper
5296; AVX2-NEXT:    retq
5297;
5298; AVX512-LABEL: trunc_ssat_v16i32_v16i8_store:
5299; AVX512:       # %bb.0:
5300; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
5301; AVX512-NEXT:    vpmovsdb %zmm0, (%rsi)
5302; AVX512-NEXT:    vzeroupper
5303; AVX512-NEXT:    retq
5304;
5305; SKX-LABEL: trunc_ssat_v16i32_v16i8_store:
5306; SKX:       # %bb.0:
5307; SKX-NEXT:    vmovdqa (%rdi), %ymm0
5308; SKX-NEXT:    vmovdqa 32(%rdi), %ymm1
5309; SKX-NEXT:    vpmovsdb %ymm1, %xmm1
5310; SKX-NEXT:    vpmovsdb %ymm0, %xmm0
5311; SKX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5312; SKX-NEXT:    vmovdqa %xmm0, (%rsi)
5313; SKX-NEXT:    vzeroupper
5314; SKX-NEXT:    retq
5315  %a0 = load <16 x i32>, <16 x i32>* %p0
5316  %1 = icmp slt <16 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
5317  %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
5318  %3 = icmp sgt <16 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
5319  %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
5320  %5 = trunc <16 x i32> %4 to <16 x i8>
5321  store <16 x i8> %5, <16 x i8>* %p1
5322  ret void
5323}
5324
5325define <8 x i8> @trunc_ssat_v8i16_v8i8(<8 x i16> %a0) {
5326; SSE-LABEL: trunc_ssat_v8i16_v8i8:
5327; SSE:       # %bb.0:
5328; SSE-NEXT:    packsswb %xmm0, %xmm0
5329; SSE-NEXT:    retq
5330;
5331; AVX-LABEL: trunc_ssat_v8i16_v8i8:
5332; AVX:       # %bb.0:
5333; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5334; AVX-NEXT:    retq
5335;
5336; AVX512-LABEL: trunc_ssat_v8i16_v8i8:
5337; AVX512:       # %bb.0:
5338; AVX512-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5339; AVX512-NEXT:    retq
5340;
5341; SKX-LABEL: trunc_ssat_v8i16_v8i8:
5342; SKX:       # %bb.0:
5343; SKX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5344; SKX-NEXT:    retq
5345  %1 = icmp slt <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
5346  %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
5347  %3 = icmp sgt <8 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
5348  %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
5349  %5 = trunc <8 x i16> %4 to <8 x i8>
5350  ret <8 x i8> %5
5351}
5352
5353define void @trunc_ssat_v8i16_v8i8_store(<8 x i16> %a0, <8 x i8> *%p1) {
5354; SSE-LABEL: trunc_ssat_v8i16_v8i8_store:
5355; SSE:       # %bb.0:
5356; SSE-NEXT:    packsswb %xmm0, %xmm0
5357; SSE-NEXT:    movq %xmm0, (%rdi)
5358; SSE-NEXT:    retq
5359;
5360; AVX-LABEL: trunc_ssat_v8i16_v8i8_store:
5361; AVX:       # %bb.0:
5362; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5363; AVX-NEXT:    vmovq %xmm0, (%rdi)
5364; AVX-NEXT:    retq
5365;
5366; AVX512F-LABEL: trunc_ssat_v8i16_v8i8_store:
5367; AVX512F:       # %bb.0:
5368; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5369; AVX512F-NEXT:    vmovq %xmm0, (%rdi)
5370; AVX512F-NEXT:    retq
5371;
5372; AVX512VL-LABEL: trunc_ssat_v8i16_v8i8_store:
5373; AVX512VL:       # %bb.0:
5374; AVX512VL-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5375; AVX512VL-NEXT:    vmovq %xmm0, (%rdi)
5376; AVX512VL-NEXT:    retq
5377;
5378; AVX512BW-LABEL: trunc_ssat_v8i16_v8i8_store:
5379; AVX512BW:       # %bb.0:
5380; AVX512BW-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
5381; AVX512BW-NEXT:    vmovq %xmm0, (%rdi)
5382; AVX512BW-NEXT:    retq
5383;
5384; AVX512BWVL-LABEL: trunc_ssat_v8i16_v8i8_store:
5385; AVX512BWVL:       # %bb.0:
5386; AVX512BWVL-NEXT:    vpmovswb %xmm0, (%rdi)
5387; AVX512BWVL-NEXT:    retq
5388;
5389; SKX-LABEL: trunc_ssat_v8i16_v8i8_store:
5390; SKX:       # %bb.0:
5391; SKX-NEXT:    vpmovswb %xmm0, (%rdi)
5392; SKX-NEXT:    retq
5393  %1 = icmp slt <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
5394  %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
5395  %3 = icmp sgt <8 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
5396  %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
5397  %5 = trunc <8 x i16> %4 to <8 x i8>
5398  store <8 x i8> %5, <8 x i8> *%p1
5399  ret void
5400}
5401
5402define <16 x i8> @trunc_ssat_v16i16_v16i8(<16 x i16> %a0) {
5403; SSE-LABEL: trunc_ssat_v16i16_v16i8:
5404; SSE:       # %bb.0:
5405; SSE-NEXT:    packsswb %xmm1, %xmm0
5406; SSE-NEXT:    retq
5407;
5408; AVX1-LABEL: trunc_ssat_v16i16_v16i8:
5409; AVX1:       # %bb.0:
5410; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
5411; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
5412; AVX1-NEXT:    vzeroupper
5413; AVX1-NEXT:    retq
5414;
5415; AVX2-LABEL: trunc_ssat_v16i16_v16i8:
5416; AVX2:       # %bb.0:
5417; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
5418; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
5419; AVX2-NEXT:    vzeroupper
5420; AVX2-NEXT:    retq
5421;
5422; AVX512F-LABEL: trunc_ssat_v16i16_v16i8:
5423; AVX512F:       # %bb.0:
5424; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
5425; AVX512F-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
5426; AVX512F-NEXT:    vzeroupper
5427; AVX512F-NEXT:    retq
5428;
5429; AVX512VL-LABEL: trunc_ssat_v16i16_v16i8:
5430; AVX512VL:       # %bb.0:
5431; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
5432; AVX512VL-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
5433; AVX512VL-NEXT:    vzeroupper
5434; AVX512VL-NEXT:    retq
5435;
5436; AVX512BW-LABEL: trunc_ssat_v16i16_v16i8:
5437; AVX512BW:       # %bb.0:
5438; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
5439; AVX512BW-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
5440; AVX512BW-NEXT:    vzeroupper
5441; AVX512BW-NEXT:    retq
5442;
5443; AVX512BWVL-LABEL: trunc_ssat_v16i16_v16i8:
5444; AVX512BWVL:       # %bb.0:
5445; AVX512BWVL-NEXT:    vpmovswb %ymm0, %xmm0
5446; AVX512BWVL-NEXT:    vzeroupper
5447; AVX512BWVL-NEXT:    retq
5448;
5449; SKX-LABEL: trunc_ssat_v16i16_v16i8:
5450; SKX:       # %bb.0:
5451; SKX-NEXT:    vpmovswb %ymm0, %xmm0
5452; SKX-NEXT:    vzeroupper
5453; SKX-NEXT:    retq
5454  %1 = icmp slt <16 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
5455  %2 = select <16 x i1> %1, <16 x i16> %a0, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
5456  %3 = icmp sgt <16 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
5457  %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
5458  %5 = trunc <16 x i16> %4 to <16 x i8>
5459  ret <16 x i8> %5
5460}
5461
5462define <32 x i8> @trunc_ssat_v32i16_v32i8(<32 x i16>* %p0) "min-legal-vector-width"="256" {
5463; SSE-LABEL: trunc_ssat_v32i16_v32i8:
5464; SSE:       # %bb.0:
5465; SSE-NEXT:    movdqa (%rdi), %xmm0
5466; SSE-NEXT:    movdqa 32(%rdi), %xmm1
5467; SSE-NEXT:    packsswb 16(%rdi), %xmm0
5468; SSE-NEXT:    packsswb 48(%rdi), %xmm1
5469; SSE-NEXT:    retq
5470;
5471; AVX1-LABEL: trunc_ssat_v32i16_v32i8:
5472; AVX1:       # %bb.0:
5473; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
5474; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm1
5475; AVX1-NEXT:    vpacksswb 48(%rdi), %xmm1, %xmm1
5476; AVX1-NEXT:    vpacksswb 16(%rdi), %xmm0, %xmm0
5477; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
5478; AVX1-NEXT:    retq
5479;
5480; AVX2-LABEL: trunc_ssat_v32i16_v32i8:
5481; AVX2:       # %bb.0:
5482; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
5483; AVX2-NEXT:    vpacksswb 32(%rdi), %ymm0, %ymm0
5484; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5485; AVX2-NEXT:    retq
5486;
5487; AVX512F-LABEL: trunc_ssat_v32i16_v32i8:
5488; AVX512F:       # %bb.0:
5489; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
5490; AVX512F-NEXT:    vpacksswb 32(%rdi), %ymm0, %ymm0
5491; AVX512F-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5492; AVX512F-NEXT:    retq
5493;
5494; AVX512VL-LABEL: trunc_ssat_v32i16_v32i8:
5495; AVX512VL:       # %bb.0:
5496; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
5497; AVX512VL-NEXT:    vpacksswb 32(%rdi), %ymm0, %ymm0
5498; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5499; AVX512VL-NEXT:    retq
5500;
5501; AVX512BW-LABEL: trunc_ssat_v32i16_v32i8:
5502; AVX512BW:       # %bb.0:
5503; AVX512BW-NEXT:    vmovdqa64 (%rdi), %zmm0
5504; AVX512BW-NEXT:    vpmovswb %zmm0, %ymm0
5505; AVX512BW-NEXT:    retq
5506;
5507; AVX512BWVL-LABEL: trunc_ssat_v32i16_v32i8:
5508; AVX512BWVL:       # %bb.0:
5509; AVX512BWVL-NEXT:    vmovdqa64 (%rdi), %zmm0
5510; AVX512BWVL-NEXT:    vpmovswb %zmm0, %ymm0
5511; AVX512BWVL-NEXT:    retq
5512;
5513; SKX-LABEL: trunc_ssat_v32i16_v32i8:
5514; SKX:       # %bb.0:
5515; SKX-NEXT:    vmovdqa (%rdi), %ymm0
5516; SKX-NEXT:    vpacksswb 32(%rdi), %ymm0, %ymm0
5517; SKX-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5518; SKX-NEXT:    retq
5519  %a0 = load <32 x i16>, <32 x i16>* %p0
5520  %1 = icmp slt <32 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
5521  %2 = select <32 x i1> %1, <32 x i16> %a0, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
5522  %3 = icmp sgt <32 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
5523  %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
5524  %5 = trunc <32 x i16> %4 to <32 x i8>
5525  ret <32 x i8> %5
5526}
5527
5528define <32 x i8> @trunc_ssat_v32i32_v32i8(<32 x i32>* %p0) "min-legal-vector-width"="256" {
5529; SSE-LABEL: trunc_ssat_v32i32_v32i8:
5530; SSE:       # %bb.0:
5531; SSE-NEXT:    movdqa (%rdi), %xmm0
5532; SSE-NEXT:    movdqa 32(%rdi), %xmm2
5533; SSE-NEXT:    movdqa 64(%rdi), %xmm1
5534; SSE-NEXT:    movdqa 96(%rdi), %xmm3
5535; SSE-NEXT:    packssdw 48(%rdi), %xmm2
5536; SSE-NEXT:    packssdw 16(%rdi), %xmm0
5537; SSE-NEXT:    packsswb %xmm2, %xmm0
5538; SSE-NEXT:    packssdw 112(%rdi), %xmm3
5539; SSE-NEXT:    packssdw 80(%rdi), %xmm1
5540; SSE-NEXT:    packsswb %xmm3, %xmm1
5541; SSE-NEXT:    retq
5542;
5543; AVX1-LABEL: trunc_ssat_v32i32_v32i8:
5544; AVX1:       # %bb.0:
5545; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
5546; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm1
5547; AVX1-NEXT:    vmovdqa 64(%rdi), %xmm2
5548; AVX1-NEXT:    vmovdqa 96(%rdi), %xmm3
5549; AVX1-NEXT:    vpackssdw 112(%rdi), %xmm3, %xmm3
5550; AVX1-NEXT:    vpackssdw 80(%rdi), %xmm2, %xmm2
5551; AVX1-NEXT:    vpacksswb %xmm3, %xmm2, %xmm2
5552; AVX1-NEXT:    vpackssdw 48(%rdi), %xmm1, %xmm1
5553; AVX1-NEXT:    vpackssdw 16(%rdi), %xmm0, %xmm0
5554; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
5555; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
5556; AVX1-NEXT:    retq
5557;
5558; AVX2-LABEL: trunc_ssat_v32i32_v32i8:
5559; AVX2:       # %bb.0:
5560; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
5561; AVX2-NEXT:    vmovdqa 64(%rdi), %ymm1
5562; AVX2-NEXT:    vpackssdw 96(%rdi), %ymm1, %ymm1
5563; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3]
5564; AVX2-NEXT:    vpackssdw 32(%rdi), %ymm0, %ymm0
5565; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5566; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
5567; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5568; AVX2-NEXT:    retq
5569;
5570; AVX512-LABEL: trunc_ssat_v32i32_v32i8:
5571; AVX512:       # %bb.0:
5572; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
5573; AVX512-NEXT:    vmovdqa64 64(%rdi), %zmm1
5574; AVX512-NEXT:    vpmovsdb %zmm0, %xmm0
5575; AVX512-NEXT:    vpmovsdb %zmm1, %xmm1
5576; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
5577; AVX512-NEXT:    retq
5578;
5579; SKX-LABEL: trunc_ssat_v32i32_v32i8:
5580; SKX:       # %bb.0:
5581; SKX-NEXT:    vmovdqa (%rdi), %ymm0
5582; SKX-NEXT:    vmovdqa 64(%rdi), %ymm1
5583; SKX-NEXT:    vpackssdw 96(%rdi), %ymm1, %ymm1
5584; SKX-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3]
5585; SKX-NEXT:    vpackssdw 32(%rdi), %ymm0, %ymm0
5586; SKX-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5587; SKX-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
5588; SKX-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5589; SKX-NEXT:    retq
5590  %a0 = load <32 x i32>, <32 x i32>* %p0
5591  %1 = icmp slt <32 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
5592  %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
5593  %3 = icmp sgt <32 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
5594  %4 = select <32 x i1> %3, <32 x i32> %2, <32 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
5595  %5 = trunc <32 x i32> %4 to <32 x i8>
5596  ret <32 x i8> %5
5597}
5598