• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2   | FileCheck %s --check-prefixes=ANY,SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ANY,SSE,SSE4,SSE41
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=ANY,SSE,SSE4,SSE42
5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2   | FileCheck %s --check-prefixes=ANY,AVX,AVX2
6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=ANY,AVX,AVX512
7
8; There are at least 3 potential patterns corresponding to an unsigned saturated add: min, cmp with sum, cmp with not.
9; Test each of those patterns with i8/i16/i32/i64.
10; Test each of those with a constant operand and a variable operand.
11; Test each of those with a 128-bit vector type.
12
13define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
14; ANY-LABEL: unsigned_sat_constant_i8_using_min:
15; ANY:       # %bb.0:
16; ANY-NEXT:    cmpb $-43, %dil
17; ANY-NEXT:    movl $213, %eax
18; ANY-NEXT:    cmovbl %edi, %eax
19; ANY-NEXT:    addb $42, %al
20; ANY-NEXT:    # kill: def $al killed $al killed $eax
21; ANY-NEXT:    retq
22  %c = icmp ult i8 %x, -43
23  %s = select i1 %c, i8 %x, i8 -43
24  %r = add i8 %s, 42
25  ret i8 %r
26}
27
28define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
29; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
30; ANY:       # %bb.0:
31; ANY-NEXT:    addb $42, %dil
32; ANY-NEXT:    movzbl %dil, %ecx
33; ANY-NEXT:    movl $255, %eax
34; ANY-NEXT:    cmovael %ecx, %eax
35; ANY-NEXT:    # kill: def $al killed $al killed $eax
36; ANY-NEXT:    retq
37  %a = add i8 %x, 42
38  %c = icmp ugt i8 %x, %a
39  %r = select i1 %c, i8 -1, i8 %a
40  ret i8 %r
41}
42
43define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) {
44; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_notval:
45; ANY:       # %bb.0:
46; ANY-NEXT:    addb $42, %dil
47; ANY-NEXT:    movzbl %dil, %ecx
48; ANY-NEXT:    movl $255, %eax
49; ANY-NEXT:    cmovael %ecx, %eax
50; ANY-NEXT:    # kill: def $al killed $al killed $eax
51; ANY-NEXT:    retq
52  %a = add i8 %x, 42
53  %c = icmp ugt i8 %x, -43
54  %r = select i1 %c, i8 -1, i8 %a
55  ret i8 %r
56}
57
58define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
59; ANY-LABEL: unsigned_sat_constant_i16_using_min:
60; ANY:       # %bb.0:
61; ANY-NEXT:    cmpw $-43, %di
62; ANY-NEXT:    movl $65493, %eax # imm = 0xFFD5
63; ANY-NEXT:    cmovbl %edi, %eax
64; ANY-NEXT:    addl $42, %eax
65; ANY-NEXT:    # kill: def $ax killed $ax killed $eax
66; ANY-NEXT:    retq
67  %c = icmp ult i16 %x, -43
68  %s = select i1 %c, i16 %x, i16 -43
69  %r = add i16 %s, 42
70  ret i16 %r
71}
72
73define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
74; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
75; ANY:       # %bb.0:
76; ANY-NEXT:    addw $42, %di
77; ANY-NEXT:    movl $65535, %eax # imm = 0xFFFF
78; ANY-NEXT:    cmovael %edi, %eax
79; ANY-NEXT:    # kill: def $ax killed $ax killed $eax
80; ANY-NEXT:    retq
81  %a = add i16 %x, 42
82  %c = icmp ugt i16 %x, %a
83  %r = select i1 %c, i16 -1, i16 %a
84  ret i16 %r
85}
86
87define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) {
88; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_notval:
89; ANY:       # %bb.0:
90; ANY-NEXT:    addw $42, %di
91; ANY-NEXT:    movl $65535, %eax # imm = 0xFFFF
92; ANY-NEXT:    cmovael %edi, %eax
93; ANY-NEXT:    # kill: def $ax killed $ax killed $eax
94; ANY-NEXT:    retq
95  %a = add i16 %x, 42
96  %c = icmp ugt i16 %x, -43
97  %r = select i1 %c, i16 -1, i16 %a
98  ret i16 %r
99}
100
101define i32 @unsigned_sat_constant_i32_using_min(i32 %x) {
102; ANY-LABEL: unsigned_sat_constant_i32_using_min:
103; ANY:       # %bb.0:
104; ANY-NEXT:    cmpl $-43, %edi
105; ANY-NEXT:    movl $-43, %eax
106; ANY-NEXT:    cmovbl %edi, %eax
107; ANY-NEXT:    addl $42, %eax
108; ANY-NEXT:    retq
109  %c = icmp ult i32 %x, -43
110  %s = select i1 %c, i32 %x, i32 -43
111  %r = add i32 %s, 42
112  ret i32 %r
113}
114
115define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
116; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_sum:
117; ANY:       # %bb.0:
118; ANY-NEXT:    addl $42, %edi
119; ANY-NEXT:    movl $-1, %eax
120; ANY-NEXT:    cmovael %edi, %eax
121; ANY-NEXT:    retq
122  %a = add i32 %x, 42
123  %c = icmp ugt i32 %x, %a
124  %r = select i1 %c, i32 -1, i32 %a
125  ret i32 %r
126}
127
128define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) {
129; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_notval:
130; ANY:       # %bb.0:
131; ANY-NEXT:    addl $42, %edi
132; ANY-NEXT:    movl $-1, %eax
133; ANY-NEXT:    cmovael %edi, %eax
134; ANY-NEXT:    retq
135  %a = add i32 %x, 42
136  %c = icmp ugt i32 %x, -43
137  %r = select i1 %c, i32 -1, i32 %a
138  ret i32 %r
139}
140
141define i64 @unsigned_sat_constant_i64_using_min(i64 %x) {
142; ANY-LABEL: unsigned_sat_constant_i64_using_min:
143; ANY:       # %bb.0:
144; ANY-NEXT:    cmpq $-43, %rdi
145; ANY-NEXT:    movq $-43, %rax
146; ANY-NEXT:    cmovbq %rdi, %rax
147; ANY-NEXT:    addq $42, %rax
148; ANY-NEXT:    retq
149  %c = icmp ult i64 %x, -43
150  %s = select i1 %c, i64 %x, i64 -43
151  %r = add i64 %s, 42
152  ret i64 %r
153}
154
155define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
156; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_sum:
157; ANY:       # %bb.0:
158; ANY-NEXT:    addq $42, %rdi
159; ANY-NEXT:    movq $-1, %rax
160; ANY-NEXT:    cmovaeq %rdi, %rax
161; ANY-NEXT:    retq
162  %a = add i64 %x, 42
163  %c = icmp ugt i64 %x, %a
164  %r = select i1 %c, i64 -1, i64 %a
165  ret i64 %r
166}
167
168define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
169; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
170; ANY:       # %bb.0:
171; ANY-NEXT:    addq $42, %rdi
172; ANY-NEXT:    movq $-1, %rax
173; ANY-NEXT:    cmovaeq %rdi, %rax
174; ANY-NEXT:    retq
175  %a = add i64 %x, 42
176  %c = icmp ugt i64 %x, -43
177  %r = select i1 %c, i64 -1, i64 %a
178  ret i64 %r
179}
180
181define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
182; ANY-LABEL: unsigned_sat_variable_i8_using_min:
183; ANY:       # %bb.0:
184; ANY-NEXT:    movl %esi, %eax
185; ANY-NEXT:    notb %al
186; ANY-NEXT:    cmpb %al, %dil
187; ANY-NEXT:    movzbl %al, %eax
188; ANY-NEXT:    cmovbl %edi, %eax
189; ANY-NEXT:    addb %sil, %al
190; ANY-NEXT:    # kill: def $al killed $al killed $eax
191; ANY-NEXT:    retq
192  %noty = xor i8 %y, -1
193  %c = icmp ult i8 %x, %noty
194  %s = select i1 %c, i8 %x, i8 %noty
195  %r = add i8 %s, %y
196  ret i8 %r
197}
198
199define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
200; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
201; ANY:       # %bb.0:
202; ANY-NEXT:    addb %sil, %dil
203; ANY-NEXT:    movzbl %dil, %ecx
204; ANY-NEXT:    movl $255, %eax
205; ANY-NEXT:    cmovael %ecx, %eax
206; ANY-NEXT:    # kill: def $al killed $al killed $eax
207; ANY-NEXT:    retq
208  %a = add i8 %x, %y
209  %c = icmp ugt i8 %x, %a
210  %r = select i1 %c, i8 -1, i8 %a
211  ret i8 %r
212}
213
214define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
215; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_notval:
216; ANY:       # %bb.0:
217; ANY-NEXT:    addb %dil, %sil
218; ANY-NEXT:    movzbl %sil, %ecx
219; ANY-NEXT:    movl $255, %eax
220; ANY-NEXT:    cmovael %ecx, %eax
221; ANY-NEXT:    # kill: def $al killed $al killed $eax
222; ANY-NEXT:    retq
223  %noty = xor i8 %y, -1
224  %a = add i8 %x, %y
225  %c = icmp ugt i8 %x, %noty
226  %r = select i1 %c, i8 -1, i8 %a
227  ret i8 %r
228}
229
230define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
231; ANY-LABEL: unsigned_sat_variable_i16_using_min:
232; ANY:       # %bb.0:
233; ANY-NEXT:    # kill: def $esi killed $esi def $rsi
234; ANY-NEXT:    movl %esi, %eax
235; ANY-NEXT:    notl %eax
236; ANY-NEXT:    cmpw %ax, %di
237; ANY-NEXT:    cmovbl %edi, %eax
238; ANY-NEXT:    addl %esi, %eax
239; ANY-NEXT:    # kill: def $ax killed $ax killed $eax
240; ANY-NEXT:    retq
241  %noty = xor i16 %y, -1
242  %c = icmp ult i16 %x, %noty
243  %s = select i1 %c, i16 %x, i16 %noty
244  %r = add i16 %s, %y
245  ret i16 %r
246}
247
248define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
249; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
250; ANY:       # %bb.0:
251; ANY-NEXT:    addw %si, %di
252; ANY-NEXT:    movl $65535, %eax # imm = 0xFFFF
253; ANY-NEXT:    cmovael %edi, %eax
254; ANY-NEXT:    # kill: def $ax killed $ax killed $eax
255; ANY-NEXT:    retq
256  %a = add i16 %x, %y
257  %c = icmp ugt i16 %x, %a
258  %r = select i1 %c, i16 -1, i16 %a
259  ret i16 %r
260}
261
262define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) {
263; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_notval:
264; ANY:       # %bb.0:
265; ANY-NEXT:    addw %di, %si
266; ANY-NEXT:    movl $65535, %eax # imm = 0xFFFF
267; ANY-NEXT:    cmovael %esi, %eax
268; ANY-NEXT:    # kill: def $ax killed $ax killed $eax
269; ANY-NEXT:    retq
270  %noty = xor i16 %y, -1
271  %a = add i16 %x, %y
272  %c = icmp ugt i16 %x, %noty
273  %r = select i1 %c, i16 -1, i16 %a
274  ret i16 %r
275}
276
277define i32 @unsigned_sat_variable_i32_using_min(i32 %x, i32 %y) {
278; ANY-LABEL: unsigned_sat_variable_i32_using_min:
279; ANY:       # %bb.0:
280; ANY-NEXT:    # kill: def $esi killed $esi def $rsi
281; ANY-NEXT:    movl %esi, %eax
282; ANY-NEXT:    notl %eax
283; ANY-NEXT:    cmpl %eax, %edi
284; ANY-NEXT:    cmovbl %edi, %eax
285; ANY-NEXT:    addl %esi, %eax
286; ANY-NEXT:    retq
287  %noty = xor i32 %y, -1
288  %c = icmp ult i32 %x, %noty
289  %s = select i1 %c, i32 %x, i32 %noty
290  %r = add i32 %s, %y
291  ret i32 %r
292}
293
294define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
295; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_sum:
296; ANY:       # %bb.0:
297; ANY-NEXT:    addl %esi, %edi
298; ANY-NEXT:    movl $-1, %eax
299; ANY-NEXT:    cmovael %edi, %eax
300; ANY-NEXT:    retq
301  %a = add i32 %x, %y
302  %c = icmp ugt i32 %x, %a
303  %r = select i1 %c, i32 -1, i32 %a
304  ret i32 %r
305}
306
307define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) {
308; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_notval:
309; ANY:       # %bb.0:
310; ANY-NEXT:    addl %esi, %edi
311; ANY-NEXT:    movl $-1, %eax
312; ANY-NEXT:    cmovael %edi, %eax
313; ANY-NEXT:    retq
314  %noty = xor i32 %y, -1
315  %a = add i32 %x, %y
316  %c = icmp ugt i32 %x, %noty
317  %r = select i1 %c, i32 -1, i32 %a
318  ret i32 %r
319}
320
321define i64 @unsigned_sat_variable_i64_using_min(i64 %x, i64 %y) {
322; ANY-LABEL: unsigned_sat_variable_i64_using_min:
323; ANY:       # %bb.0:
324; ANY-NEXT:    movq %rsi, %rax
325; ANY-NEXT:    notq %rax
326; ANY-NEXT:    cmpq %rax, %rdi
327; ANY-NEXT:    cmovbq %rdi, %rax
328; ANY-NEXT:    addq %rsi, %rax
329; ANY-NEXT:    retq
330  %noty = xor i64 %y, -1
331  %c = icmp ult i64 %x, %noty
332  %s = select i1 %c, i64 %x, i64 %noty
333  %r = add i64 %s, %y
334  ret i64 %r
335}
336
337define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
338; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_sum:
339; ANY:       # %bb.0:
340; ANY-NEXT:    addq %rsi, %rdi
341; ANY-NEXT:    movq $-1, %rax
342; ANY-NEXT:    cmovaeq %rdi, %rax
343; ANY-NEXT:    retq
344  %a = add i64 %x, %y
345  %c = icmp ugt i64 %x, %a
346  %r = select i1 %c, i64 -1, i64 %a
347  ret i64 %r
348}
349
350define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) {
351; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_notval:
352; ANY:       # %bb.0:
353; ANY-NEXT:    addq %rsi, %rdi
354; ANY-NEXT:    movq $-1, %rax
355; ANY-NEXT:    cmovaeq %rdi, %rax
356; ANY-NEXT:    retq
357  %noty = xor i64 %y, -1
358  %a = add i64 %x, %y
359  %c = icmp ugt i64 %x, %noty
360  %r = select i1 %c, i64 -1, i64 %a
361  ret i64 %r
362}
363
364define <16 x i8> @unsigned_sat_constant_v16i8_using_min(<16 x i8> %x) {
365; SSE-LABEL: unsigned_sat_constant_v16i8_using_min:
366; SSE:       # %bb.0:
367; SSE-NEXT:    pminub {{.*}}(%rip), %xmm0
368; SSE-NEXT:    paddb {{.*}}(%rip), %xmm0
369; SSE-NEXT:    retq
370;
371; AVX-LABEL: unsigned_sat_constant_v16i8_using_min:
372; AVX:       # %bb.0:
373; AVX-NEXT:    vpminub {{.*}}(%rip), %xmm0, %xmm0
374; AVX-NEXT:    vpaddb {{.*}}(%rip), %xmm0, %xmm0
375; AVX-NEXT:    retq
376  %c = icmp ult <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
377  %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
378  %r = add <16 x i8> %s, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
379  ret <16 x i8> %r
380}
381
382define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) {
383; SSE-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum:
384; SSE:       # %bb.0:
385; SSE-NEXT:    paddusb {{.*}}(%rip), %xmm0
386; SSE-NEXT:    retq
387;
388; AVX-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum:
389; AVX:       # %bb.0:
390; AVX-NEXT:    vpaddusb {{.*}}(%rip), %xmm0, %xmm0
391; AVX-NEXT:    retq
392  %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
393  %c = icmp ugt <16 x i8> %x, %a
394  %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
395  ret <16 x i8> %r
396}
397
398define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_notval(<16 x i8> %x) {
399; SSE-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval:
400; SSE:       # %bb.0:
401; SSE-NEXT:    paddusb {{.*}}(%rip), %xmm0
402; SSE-NEXT:    retq
403;
404; AVX-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval:
405; AVX:       # %bb.0:
406; AVX-NEXT:    vpaddusb {{.*}}(%rip), %xmm0, %xmm0
407; AVX-NEXT:    retq
408  %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
409  %c = icmp ugt <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
410  %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
411  ret <16 x i8> %r
412}
413
414define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) {
415; SSE2-LABEL: unsigned_sat_constant_v8i16_using_min:
416; SSE2:       # %bb.0:
417; SSE2-NEXT:    movdqa %xmm0, %xmm1
418; SSE2-NEXT:    psubusw {{.*}}(%rip), %xmm1
419; SSE2-NEXT:    psubw %xmm1, %xmm0
420; SSE2-NEXT:    paddw {{.*}}(%rip), %xmm0
421; SSE2-NEXT:    retq
422;
423; SSE4-LABEL: unsigned_sat_constant_v8i16_using_min:
424; SSE4:       # %bb.0:
425; SSE4-NEXT:    pminuw {{.*}}(%rip), %xmm0
426; SSE4-NEXT:    paddw {{.*}}(%rip), %xmm0
427; SSE4-NEXT:    retq
428;
429; AVX-LABEL: unsigned_sat_constant_v8i16_using_min:
430; AVX:       # %bb.0:
431; AVX-NEXT:    vpminuw {{.*}}(%rip), %xmm0, %xmm0
432; AVX-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
433; AVX-NEXT:    retq
434  %c = icmp ult <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
435  %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
436  %r = add <8 x i16> %s, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
437  ret <8 x i16> %r
438}
439
440define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) {
441; SSE-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum:
442; SSE:       # %bb.0:
443; SSE-NEXT:    paddusw {{.*}}(%rip), %xmm0
444; SSE-NEXT:    retq
445;
446; AVX-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum:
447; AVX:       # %bb.0:
448; AVX-NEXT:    vpaddusw {{.*}}(%rip), %xmm0, %xmm0
449; AVX-NEXT:    retq
450  %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
451  %c = icmp ugt <8 x i16> %x, %a
452  %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
453  ret <8 x i16> %r
454}
455
456define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_notval(<8 x i16> %x) {
457; SSE-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval:
458; SSE:       # %bb.0:
459; SSE-NEXT:    paddusw {{.*}}(%rip), %xmm0
460; SSE-NEXT:    retq
461;
462; AVX-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval:
463; AVX:       # %bb.0:
464; AVX-NEXT:    vpaddusw {{.*}}(%rip), %xmm0, %xmm0
465; AVX-NEXT:    retq
466  %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
467  %c = icmp ugt <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
468  %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
469  ret <8 x i16> %r
470}
471
472define <4 x i32> @unsigned_sat_constant_v4i32_using_min(<4 x i32> %x) {
473; SSE2-LABEL: unsigned_sat_constant_v4i32_using_min:
474; SSE2:       # %bb.0:
475; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
476; SSE2-NEXT:    pxor %xmm0, %xmm1
477; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483605,2147483605,2147483605,2147483605]
478; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
479; SSE2-NEXT:    pand %xmm2, %xmm0
480; SSE2-NEXT:    pandn {{.*}}(%rip), %xmm2
481; SSE2-NEXT:    por %xmm2, %xmm0
482; SSE2-NEXT:    paddd {{.*}}(%rip), %xmm0
483; SSE2-NEXT:    retq
484;
485; SSE4-LABEL: unsigned_sat_constant_v4i32_using_min:
486; SSE4:       # %bb.0:
487; SSE4-NEXT:    pminud {{.*}}(%rip), %xmm0
488; SSE4-NEXT:    paddd {{.*}}(%rip), %xmm0
489; SSE4-NEXT:    retq
490;
491; AVX2-LABEL: unsigned_sat_constant_v4i32_using_min:
492; AVX2:       # %bb.0:
493; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294967253,4294967253,4294967253,4294967253]
494; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
495; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
496; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
497; AVX2-NEXT:    retq
498;
499; AVX512-LABEL: unsigned_sat_constant_v4i32_using_min:
500; AVX512:       # %bb.0:
501; AVX512-NEXT:    vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0
502; AVX512-NEXT:    vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
503; AVX512-NEXT:    retq
504  %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
505  %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 -43, i32 -43, i32 -43, i32 -43>
506  %r = add <4 x i32> %s, <i32 42, i32 42, i32 42, i32 42>
507  ret <4 x i32> %r
508}
509
510define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) {
511; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
512; SSE2:       # %bb.0:
513; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [42,42,42,42]
514; SSE2-NEXT:    paddd %xmm0, %xmm1
515; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
516; SSE2-NEXT:    pxor %xmm2, %xmm0
517; SSE2-NEXT:    pxor %xmm1, %xmm2
518; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
519; SSE2-NEXT:    por %xmm1, %xmm0
520; SSE2-NEXT:    retq
521;
522; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
523; SSE4:       # %bb.0:
524; SSE4-NEXT:    pminud {{.*}}(%rip), %xmm0
525; SSE4-NEXT:    paddd {{.*}}(%rip), %xmm0
526; SSE4-NEXT:    retq
527;
528; AVX2-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
529; AVX2:       # %bb.0:
530; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
531; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [4294967253,4294967253,4294967253,4294967253]
532; AVX2-NEXT:    vpminud %xmm2, %xmm0, %xmm0
533; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
534; AVX2-NEXT:    retq
535;
536; AVX512-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
537; AVX512:       # %bb.0:
538; AVX512-NEXT:    vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0
539; AVX512-NEXT:    vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
540; AVX512-NEXT:    retq
541  %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
542  %c = icmp ugt <4 x i32> %x, %a
543  %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
544  ret <4 x i32> %r
545}
546
547define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval(<4 x i32> %x) {
548; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
549; SSE2:       # %bb.0:
550; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [42,42,42,42]
551; SSE2-NEXT:    paddd %xmm0, %xmm1
552; SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
553; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
554; SSE2-NEXT:    por %xmm1, %xmm0
555; SSE2-NEXT:    retq
556;
557; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
558; SSE4:       # %bb.0:
559; SSE4-NEXT:    pminud {{.*}}(%rip), %xmm0
560; SSE4-NEXT:    paddd {{.*}}(%rip), %xmm0
561; SSE4-NEXT:    retq
562;
563; AVX2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
564; AVX2:       # %bb.0:
565; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
566; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [4294967253,4294967253,4294967253,4294967253]
567; AVX2-NEXT:    vpminud %xmm2, %xmm0, %xmm0
568; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
569; AVX2-NEXT:    retq
570;
571; AVX512-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
572; AVX512:       # %bb.0:
573; AVX512-NEXT:    vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0
574; AVX512-NEXT:    vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
575; AVX512-NEXT:    retq
576  %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
577  %c = icmp ugt <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
578  %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
579  ret <4 x i32> %r
580}
581
582define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat(<4 x i32> %x) {
583; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat:
584; SSE2:       # %bb.0:
585; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [43,44,45,46]
586; SSE2-NEXT:    paddd %xmm0, %xmm1
587; SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
588; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
589; SSE2-NEXT:    por %xmm1, %xmm0
590; SSE2-NEXT:    retq
591;
592; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat:
593; SSE4:       # %bb.0:
594; SSE4-NEXT:    pminud {{.*}}(%rip), %xmm0
595; SSE4-NEXT:    paddd {{.*}}(%rip), %xmm0
596; SSE4-NEXT:    retq
597;
598; AVX-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat:
599; AVX:       # %bb.0:
600; AVX-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm0
601; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
602; AVX-NEXT:    retq
603  %a = add <4 x i32> %x, <i32 43, i32 44, i32 45, i32 46>
604  %c = icmp ugt <4 x i32> %x, <i32 -44, i32 -45, i32 -46, i32 -47>
605  %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
606  ret <4 x i32> %r
607}
608
609define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) {
610; SSE2-LABEL: unsigned_sat_constant_v2i64_using_min:
611; SSE2:       # %bb.0:
612; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
613; SSE2-NEXT:    pxor %xmm0, %xmm1
614; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117]
615; SSE2-NEXT:    movdqa %xmm2, %xmm3
616; SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
617; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
618; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
619; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
620; SSE2-NEXT:    pand %xmm4, %xmm1
621; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
622; SSE2-NEXT:    por %xmm1, %xmm2
623; SSE2-NEXT:    pand %xmm2, %xmm0
624; SSE2-NEXT:    pandn {{.*}}(%rip), %xmm2
625; SSE2-NEXT:    por %xmm2, %xmm0
626; SSE2-NEXT:    paddq {{.*}}(%rip), %xmm0
627; SSE2-NEXT:    retq
628;
629; SSE41-LABEL: unsigned_sat_constant_v2i64_using_min:
630; SSE41:       # %bb.0:
631; SSE41-NEXT:    movdqa %xmm0, %xmm1
632; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [18446744073709551573,18446744073709551573]
633; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
634; SSE41-NEXT:    pxor %xmm1, %xmm0
635; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [9223372034707292117,9223372034707292117]
636; SSE41-NEXT:    movdqa %xmm3, %xmm4
637; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
638; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
639; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
640; SSE41-NEXT:    pand %xmm4, %xmm0
641; SSE41-NEXT:    por %xmm3, %xmm0
642; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
643; SSE41-NEXT:    paddq {{.*}}(%rip), %xmm2
644; SSE41-NEXT:    movdqa %xmm2, %xmm0
645; SSE41-NEXT:    retq
646;
647; SSE42-LABEL: unsigned_sat_constant_v2i64_using_min:
648; SSE42:       # %bb.0:
649; SSE42-NEXT:    movdqa %xmm0, %xmm1
650; SSE42-NEXT:    movapd {{.*#+}} xmm2 = [18446744073709551573,18446744073709551573]
651; SSE42-NEXT:    movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
652; SSE42-NEXT:    pxor %xmm0, %xmm3
653; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775765,9223372036854775765]
654; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
655; SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
656; SSE42-NEXT:    paddq {{.*}}(%rip), %xmm2
657; SSE42-NEXT:    movdqa %xmm2, %xmm0
658; SSE42-NEXT:    retq
659;
660; AVX2-LABEL: unsigned_sat_constant_v2i64_using_min:
661; AVX2:       # %bb.0:
662; AVX2-NEXT:    vmovapd {{.*#+}} xmm1 = [18446744073709551573,18446744073709551573]
663; AVX2-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm2
664; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775765,9223372036854775765]
665; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
666; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
667; AVX2-NEXT:    vpaddq {{.*}}(%rip), %xmm0, %xmm0
668; AVX2-NEXT:    retq
669;
670; AVX512-LABEL: unsigned_sat_constant_v2i64_using_min:
671; AVX512:       # %bb.0:
672; AVX512-NEXT:    vpminuq {{.*}}(%rip), %xmm0, %xmm0
673; AVX512-NEXT:    vpaddq {{.*}}(%rip), %xmm0, %xmm0
674; AVX512-NEXT:    retq
675  %c = icmp ult <2 x i64> %x, <i64 -43, i64 -43>
676  %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> <i64 -43, i64 -43>
677  %r = add <2 x i64> %s, <i64 42, i64 42>
678  ret <2 x i64> %r
679}
680
681define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) {
682; SSE2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
683; SSE2:       # %bb.0:
684; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [42,42]
685; SSE2-NEXT:    paddq %xmm0, %xmm1
686; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
687; SSE2-NEXT:    pxor %xmm2, %xmm0
688; SSE2-NEXT:    pxor %xmm1, %xmm2
689; SSE2-NEXT:    movdqa %xmm0, %xmm3
690; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
691; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
692; SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
693; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
694; SSE2-NEXT:    pand %xmm4, %xmm2
695; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
696; SSE2-NEXT:    por %xmm1, %xmm0
697; SSE2-NEXT:    por %xmm2, %xmm0
698; SSE2-NEXT:    retq
699;
700; SSE41-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
701; SSE41:       # %bb.0:
702; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [42,42]
703; SSE41-NEXT:    paddq %xmm0, %xmm1
704; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
705; SSE41-NEXT:    pxor %xmm2, %xmm0
706; SSE41-NEXT:    pxor %xmm1, %xmm2
707; SSE41-NEXT:    movdqa %xmm0, %xmm3
708; SSE41-NEXT:    pcmpgtd %xmm2, %xmm3
709; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
710; SSE41-NEXT:    pcmpeqd %xmm0, %xmm2
711; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
712; SSE41-NEXT:    pand %xmm4, %xmm2
713; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
714; SSE41-NEXT:    por %xmm1, %xmm0
715; SSE41-NEXT:    por %xmm2, %xmm0
716; SSE41-NEXT:    retq
717;
718; SSE42-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
719; SSE42:       # %bb.0:
720; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
721; SSE42-NEXT:    movdqa %xmm0, %xmm1
722; SSE42-NEXT:    pxor %xmm2, %xmm1
723; SSE42-NEXT:    paddq {{.*}}(%rip), %xmm0
724; SSE42-NEXT:    pxor %xmm0, %xmm2
725; SSE42-NEXT:    pcmpgtq %xmm2, %xmm1
726; SSE42-NEXT:    por %xmm0, %xmm1
727; SSE42-NEXT:    movdqa %xmm1, %xmm0
728; SSE42-NEXT:    retq
729;
730; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
731; AVX2:       # %bb.0:
732; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
733; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm2
734; AVX2-NEXT:    vpaddq {{.*}}(%rip), %xmm0, %xmm0
735; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm1
736; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm1
737; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
738; AVX2-NEXT:    retq
739;
740; AVX512-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
741; AVX512:       # %bb.0:
742; AVX512-NEXT:    vpminuq {{.*}}(%rip), %xmm0, %xmm0
743; AVX512-NEXT:    vpaddq {{.*}}(%rip), %xmm0, %xmm0
744; AVX512-NEXT:    retq
745  %a = add <2 x i64> %x, <i64 42, i64 42>
746  %c = icmp ugt <2 x i64> %x, %a
747  %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
748  ret <2 x i64> %r
749}
750
751define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) {
752; SSE2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
753; SSE2:       # %bb.0:
754; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [42,42]
755; SSE2-NEXT:    paddq %xmm0, %xmm1
756; SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
757; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117]
758; SSE2-NEXT:    movdqa %xmm0, %xmm3
759; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
760; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
761; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
762; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
763; SSE2-NEXT:    pand %xmm4, %xmm2
764; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
765; SSE2-NEXT:    por %xmm1, %xmm0
766; SSE2-NEXT:    por %xmm2, %xmm0
767; SSE2-NEXT:    retq
768;
769; SSE41-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
770; SSE41:       # %bb.0:
771; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [42,42]
772; SSE41-NEXT:    paddq %xmm0, %xmm1
773; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
774; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117]
775; SSE41-NEXT:    movdqa %xmm0, %xmm3
776; SSE41-NEXT:    pcmpgtd %xmm2, %xmm3
777; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
778; SSE41-NEXT:    pcmpeqd %xmm2, %xmm0
779; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
780; SSE41-NEXT:    pand %xmm4, %xmm2
781; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
782; SSE41-NEXT:    por %xmm1, %xmm0
783; SSE41-NEXT:    por %xmm2, %xmm0
784; SSE41-NEXT:    retq
785;
786; SSE42-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
787; SSE42:       # %bb.0:
788; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
789; SSE42-NEXT:    movdqa %xmm0, %xmm1
790; SSE42-NEXT:    pxor %xmm2, %xmm1
791; SSE42-NEXT:    paddq {{.*}}(%rip), %xmm0
792; SSE42-NEXT:    pxor %xmm0, %xmm2
793; SSE42-NEXT:    pcmpgtq %xmm2, %xmm1
794; SSE42-NEXT:    por %xmm0, %xmm1
795; SSE42-NEXT:    movdqa %xmm1, %xmm0
796; SSE42-NEXT:    retq
797;
798; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
799; AVX2:       # %bb.0:
800; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
801; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm2
802; AVX2-NEXT:    vpaddq {{.*}}(%rip), %xmm0, %xmm0
803; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm1
804; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm1
805; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
806; AVX2-NEXT:    retq
807;
808; AVX512-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
809; AVX512:       # %bb.0:
810; AVX512-NEXT:    vpminuq {{.*}}(%rip), %xmm0, %xmm0
811; AVX512-NEXT:    vpaddq {{.*}}(%rip), %xmm0, %xmm0
812; AVX512-NEXT:    retq
813  %a = add <2 x i64> %x, <i64 42, i64 42>
814  %c = icmp ugt <2 x i64> %x, <i64 -43, i64 -43>
815  %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
816  ret <2 x i64> %r
817}
818
819define <16 x i8> @unsigned_sat_variable_v16i8_using_min(<16 x i8> %x, <16 x i8> %y) {
820; SSE-LABEL: unsigned_sat_variable_v16i8_using_min:
821; SSE:       # %bb.0:
822; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
823; SSE-NEXT:    pxor %xmm1, %xmm2
824; SSE-NEXT:    pminub %xmm2, %xmm0
825; SSE-NEXT:    paddb %xmm1, %xmm0
826; SSE-NEXT:    retq
827;
828; AVX2-LABEL: unsigned_sat_variable_v16i8_using_min:
829; AVX2:       # %bb.0:
830; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
831; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
832; AVX2-NEXT:    vpminub %xmm2, %xmm0, %xmm0
833; AVX2-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
834; AVX2-NEXT:    retq
835;
836; AVX512-LABEL: unsigned_sat_variable_v16i8_using_min:
837; AVX512:       # %bb.0:
838; AVX512-NEXT:    vmovdqa %xmm1, %xmm2
839; AVX512-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm2
840; AVX512-NEXT:    vpminub %xmm2, %xmm0, %xmm0
841; AVX512-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
842; AVX512-NEXT:    retq
843  %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
844  %c = icmp ult <16 x i8> %x, %noty
845  %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %noty
846  %r = add <16 x i8> %s, %y
847  ret <16 x i8> %r
848}
849
850define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_sum(<16 x i8> %x, <16 x i8> %y) {
851; SSE-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum:
852; SSE:       # %bb.0:
853; SSE-NEXT:    paddusb %xmm1, %xmm0
854; SSE-NEXT:    retq
855;
856; AVX-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum:
857; AVX:       # %bb.0:
858; AVX-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0
859; AVX-NEXT:    retq
860  %a = add <16 x i8> %x, %y
861  %c = icmp ugt <16 x i8> %x, %a
862  %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
863  ret <16 x i8> %r
864}
865
866define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_notval(<16 x i8> %x, <16 x i8> %y) {
867; SSE-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
868; SSE:       # %bb.0:
869; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
870; SSE-NEXT:    movdqa %xmm0, %xmm3
871; SSE-NEXT:    paddb %xmm1, %xmm3
872; SSE-NEXT:    pxor %xmm2, %xmm1
873; SSE-NEXT:    pminub %xmm0, %xmm1
874; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
875; SSE-NEXT:    pxor %xmm2, %xmm0
876; SSE-NEXT:    por %xmm3, %xmm0
877; SSE-NEXT:    retq
878;
879; AVX2-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
880; AVX2:       # %bb.0:
881; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
882; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
883; AVX2-NEXT:    vpaddb %xmm1, %xmm0, %xmm1
884; AVX2-NEXT:    vpminub %xmm3, %xmm0, %xmm3
885; AVX2-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
886; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
887; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
888; AVX2-NEXT:    retq
889;
890; AVX512-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
891; AVX512:       # %bb.0:
892; AVX512-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
893; AVX512-NEXT:    vpaddb %xmm1, %xmm0, %xmm3
894; AVX512-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm1
895; AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm1
896; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
897; AVX512-NEXT:    vpternlogq $222, %xmm2, %xmm3, %xmm0
898; AVX512-NEXT:    retq
899  %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
900  %a = add <16 x i8> %x, %y
901  %c = icmp ugt <16 x i8> %x, %noty
902  %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
903  ret <16 x i8> %r
904}
905
906define <8 x i16> @unsigned_sat_variable_v8i16_using_min(<8 x i16> %x, <8 x i16> %y) {
907; SSE2-LABEL: unsigned_sat_variable_v8i16_using_min:
908; SSE2:       # %bb.0:
909; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
910; SSE2-NEXT:    pxor %xmm1, %xmm2
911; SSE2-NEXT:    movdqa %xmm0, %xmm3
912; SSE2-NEXT:    psubusw %xmm2, %xmm3
913; SSE2-NEXT:    psubw %xmm3, %xmm0
914; SSE2-NEXT:    paddw %xmm1, %xmm0
915; SSE2-NEXT:    retq
916;
917; SSE4-LABEL: unsigned_sat_variable_v8i16_using_min:
918; SSE4:       # %bb.0:
919; SSE4-NEXT:    pcmpeqd %xmm2, %xmm2
920; SSE4-NEXT:    pxor %xmm1, %xmm2
921; SSE4-NEXT:    pminuw %xmm2, %xmm0
922; SSE4-NEXT:    paddw %xmm1, %xmm0
923; SSE4-NEXT:    retq
924;
925; AVX2-LABEL: unsigned_sat_variable_v8i16_using_min:
926; AVX2:       # %bb.0:
927; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
928; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
929; AVX2-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
930; AVX2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
931; AVX2-NEXT:    retq
932;
933; AVX512-LABEL: unsigned_sat_variable_v8i16_using_min:
934; AVX512:       # %bb.0:
935; AVX512-NEXT:    vmovdqa %xmm1, %xmm2
936; AVX512-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm2
937; AVX512-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
938; AVX512-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
939; AVX512-NEXT:    retq
940  %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
941  %c = icmp ult <8 x i16> %x, %noty
942  %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %noty
943  %r = add <8 x i16> %s, %y
944  ret <8 x i16> %r
945}
946
947define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_sum(<8 x i16> %x, <8 x i16> %y) {
948; SSE-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum:
949; SSE:       # %bb.0:
950; SSE-NEXT:    paddusw %xmm1, %xmm0
951; SSE-NEXT:    retq
952;
953; AVX-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum:
954; AVX:       # %bb.0:
955; AVX-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0
956; AVX-NEXT:    retq
957  %a = add <8 x i16> %x, %y
958  %c = icmp ugt <8 x i16> %x, %a
959  %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
960  ret <8 x i16> %r
961}
962
963define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_notval(<8 x i16> %x, <8 x i16> %y) {
964; SSE2-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
965; SSE2:       # %bb.0:
966; SSE2-NEXT:    movdqa %xmm0, %xmm2
967; SSE2-NEXT:    paddw %xmm1, %xmm2
968; SSE2-NEXT:    pxor {{.*}}(%rip), %xmm1
969; SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
970; SSE2-NEXT:    pcmpgtw %xmm1, %xmm0
971; SSE2-NEXT:    por %xmm2, %xmm0
972; SSE2-NEXT:    retq
973;
974; SSE4-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
975; SSE4:       # %bb.0:
976; SSE4-NEXT:    pcmpeqd %xmm2, %xmm2
977; SSE4-NEXT:    movdqa %xmm0, %xmm3
978; SSE4-NEXT:    paddw %xmm1, %xmm3
979; SSE4-NEXT:    pxor %xmm2, %xmm1
980; SSE4-NEXT:    pminuw %xmm0, %xmm1
981; SSE4-NEXT:    pcmpeqw %xmm1, %xmm0
982; SSE4-NEXT:    pxor %xmm2, %xmm0
983; SSE4-NEXT:    por %xmm3, %xmm0
984; SSE4-NEXT:    retq
985;
986; AVX2-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
987; AVX2:       # %bb.0:
988; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
989; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
990; AVX2-NEXT:    vpaddw %xmm1, %xmm0, %xmm1
991; AVX2-NEXT:    vpminuw %xmm3, %xmm0, %xmm3
992; AVX2-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
993; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
994; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
995; AVX2-NEXT:    retq
996;
997; AVX512-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
998; AVX512:       # %bb.0:
999; AVX512-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1000; AVX512-NEXT:    vpaddw %xmm1, %xmm0, %xmm3
1001; AVX512-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm1
1002; AVX512-NEXT:    vpminuw %xmm1, %xmm0, %xmm1
1003; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
1004; AVX512-NEXT:    vpternlogq $222, %xmm2, %xmm3, %xmm0
1005; AVX512-NEXT:    retq
1006  %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1007  %a = add <8 x i16> %x, %y
1008  %c = icmp ugt <8 x i16> %x, %noty
1009  %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
1010  ret <8 x i16> %r
1011}
1012
1013define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32> %y) {
1014; SSE2-LABEL: unsigned_sat_variable_v4i32_using_min:
1015; SSE2:       # %bb.0:
1016; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
1017; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
1018; SSE2-NEXT:    pxor %xmm0, %xmm3
1019; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483647,2147483647,2147483647,2147483647]
1020; SSE2-NEXT:    pxor %xmm1, %xmm4
1021; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
1022; SSE2-NEXT:    pand %xmm4, %xmm0
1023; SSE2-NEXT:    pxor %xmm2, %xmm4
1024; SSE2-NEXT:    movdqa %xmm1, %xmm2
1025; SSE2-NEXT:    pandn %xmm4, %xmm2
1026; SSE2-NEXT:    por %xmm2, %xmm0
1027; SSE2-NEXT:    paddd %xmm1, %xmm0
1028; SSE2-NEXT:    retq
1029;
1030; SSE4-LABEL: unsigned_sat_variable_v4i32_using_min:
1031; SSE4:       # %bb.0:
1032; SSE4-NEXT:    pcmpeqd %xmm2, %xmm2
1033; SSE4-NEXT:    pxor %xmm1, %xmm2
1034; SSE4-NEXT:    pminud %xmm2, %xmm0
1035; SSE4-NEXT:    paddd %xmm1, %xmm0
1036; SSE4-NEXT:    retq
1037;
1038; AVX2-LABEL: unsigned_sat_variable_v4i32_using_min:
1039; AVX2:       # %bb.0:
1040; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1041; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
1042; AVX2-NEXT:    vpminud %xmm2, %xmm0, %xmm0
1043; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
1044; AVX2-NEXT:    retq
1045;
1046; AVX512-LABEL: unsigned_sat_variable_v4i32_using_min:
1047; AVX512:       # %bb.0:
1048; AVX512-NEXT:    vmovdqa %xmm1, %xmm2
1049; AVX512-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm2
1050; AVX512-NEXT:    vpminud %xmm2, %xmm0, %xmm0
1051; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
1052; AVX512-NEXT:    retq
1053  %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
1054  %c = icmp ult <4 x i32> %x, %noty
1055  %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %noty
1056  %r = add <4 x i32> %s, %y
1057  ret <4 x i32> %r
1058}
1059
1060define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_sum(<4 x i32> %x, <4 x i32> %y) {
1061; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1062; SSE2:       # %bb.0:
1063; SSE2-NEXT:    paddd %xmm0, %xmm1
1064; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1065; SSE2-NEXT:    pxor %xmm2, %xmm0
1066; SSE2-NEXT:    pxor %xmm1, %xmm2
1067; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
1068; SSE2-NEXT:    por %xmm1, %xmm0
1069; SSE2-NEXT:    retq
1070;
1071; SSE4-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1072; SSE4:       # %bb.0:
1073; SSE4-NEXT:    pcmpeqd %xmm2, %xmm2
1074; SSE4-NEXT:    pxor %xmm1, %xmm2
1075; SSE4-NEXT:    pminud %xmm2, %xmm0
1076; SSE4-NEXT:    paddd %xmm1, %xmm0
1077; SSE4-NEXT:    retq
1078;
1079; AVX2-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1080; AVX2:       # %bb.0:
1081; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1082; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
1083; AVX2-NEXT:    vpminud %xmm2, %xmm0, %xmm0
1084; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
1085; AVX2-NEXT:    retq
1086;
1087; AVX512-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1088; AVX512:       # %bb.0:
1089; AVX512-NEXT:    vmovdqa %xmm1, %xmm2
1090; AVX512-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm2
1091; AVX512-NEXT:    vpminud %xmm2, %xmm0, %xmm0
1092; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
1093; AVX512-NEXT:    retq
1094  %a = add <4 x i32> %x, %y
1095  %c = icmp ugt <4 x i32> %x, %a
1096  %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
1097  ret <4 x i32> %r
1098}
1099
1100define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_notval(<4 x i32> %x, <4 x i32> %y) {
1101; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1102; SSE2:       # %bb.0:
1103; SSE2-NEXT:    movdqa %xmm0, %xmm2
1104; SSE2-NEXT:    paddd %xmm1, %xmm2
1105; SSE2-NEXT:    pxor {{.*}}(%rip), %xmm1
1106; SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
1107; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
1108; SSE2-NEXT:    por %xmm2, %xmm0
1109; SSE2-NEXT:    retq
1110;
1111; SSE4-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1112; SSE4:       # %bb.0:
1113; SSE4-NEXT:    pcmpeqd %xmm2, %xmm2
1114; SSE4-NEXT:    movdqa %xmm0, %xmm3
1115; SSE4-NEXT:    paddd %xmm1, %xmm3
1116; SSE4-NEXT:    pxor %xmm2, %xmm1
1117; SSE4-NEXT:    pminud %xmm0, %xmm1
1118; SSE4-NEXT:    pcmpeqd %xmm1, %xmm0
1119; SSE4-NEXT:    pxor %xmm2, %xmm0
1120; SSE4-NEXT:    por %xmm3, %xmm0
1121; SSE4-NEXT:    retq
1122;
1123; AVX2-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1124; AVX2:       # %bb.0:
1125; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1126; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
1127; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
1128; AVX2-NEXT:    vpminud %xmm3, %xmm0, %xmm3
1129; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
1130; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
1131; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1132; AVX2-NEXT:    retq
1133;
1134; AVX512-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1135; AVX512:       # %bb.0:
1136; AVX512-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1137; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm2
1138; AVX512-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm1
1139; AVX512-NEXT:    vpcmpnleud %xmm1, %xmm0, %k1
1140; AVX512-NEXT:    vmovdqa32 %xmm3, %xmm2 {%k1}
1141; AVX512-NEXT:    vmovdqa %xmm2, %xmm0
1142; AVX512-NEXT:    retq
1143  %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
1144  %a = add <4 x i32> %x, %y
1145  %c = icmp ugt <4 x i32> %x, %noty
1146  %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
1147  ret <4 x i32> %r
1148}
1149
1150define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64> %y) {
1151; SSE2-LABEL: unsigned_sat_variable_v2i64_using_min:
1152; SSE2:       # %bb.0:
1153; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
1154; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
1155; SSE2-NEXT:    pxor %xmm0, %xmm3
1156; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [9223372034707292159,9223372034707292159]
1157; SSE2-NEXT:    pxor %xmm1, %xmm4
1158; SSE2-NEXT:    movdqa %xmm4, %xmm5
1159; SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
1160; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1161; SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
1162; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1163; SSE2-NEXT:    pand %xmm6, %xmm3
1164; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1165; SSE2-NEXT:    por %xmm3, %xmm4
1166; SSE2-NEXT:    pand %xmm4, %xmm0
1167; SSE2-NEXT:    pxor %xmm2, %xmm4
1168; SSE2-NEXT:    movdqa %xmm1, %xmm2
1169; SSE2-NEXT:    pandn %xmm4, %xmm2
1170; SSE2-NEXT:    por %xmm2, %xmm0
1171; SSE2-NEXT:    paddq %xmm1, %xmm0
1172; SSE2-NEXT:    retq
1173;
1174; SSE41-LABEL: unsigned_sat_variable_v2i64_using_min:
1175; SSE41:       # %bb.0:
1176; SSE41-NEXT:    movdqa %xmm0, %xmm2
1177; SSE41-NEXT:    pcmpeqd %xmm3, %xmm3
1178; SSE41-NEXT:    pxor %xmm1, %xmm3
1179; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
1180; SSE41-NEXT:    pxor %xmm2, %xmm0
1181; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [9223372034707292159,9223372034707292159]
1182; SSE41-NEXT:    pxor %xmm1, %xmm4
1183; SSE41-NEXT:    movdqa %xmm4, %xmm5
1184; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
1185; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
1186; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
1187; SSE41-NEXT:    pand %xmm5, %xmm0
1188; SSE41-NEXT:    por %xmm4, %xmm0
1189; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
1190; SSE41-NEXT:    paddq %xmm1, %xmm3
1191; SSE41-NEXT:    movdqa %xmm3, %xmm0
1192; SSE41-NEXT:    retq
1193;
1194; SSE42-LABEL: unsigned_sat_variable_v2i64_using_min:
1195; SSE42:       # %bb.0:
1196; SSE42-NEXT:    movdqa %xmm0, %xmm2
1197; SSE42-NEXT:    pcmpeqd %xmm3, %xmm3
1198; SSE42-NEXT:    pxor %xmm1, %xmm3
1199; SSE42-NEXT:    movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
1200; SSE42-NEXT:    pxor %xmm0, %xmm4
1201; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775807,9223372036854775807]
1202; SSE42-NEXT:    pxor %xmm1, %xmm0
1203; SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
1204; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
1205; SSE42-NEXT:    paddq %xmm1, %xmm3
1206; SSE42-NEXT:    movdqa %xmm3, %xmm0
1207; SSE42-NEXT:    retq
1208;
1209; AVX2-LABEL: unsigned_sat_variable_v2i64_using_min:
1210; AVX2:       # %bb.0:
1211; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1212; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
1213; AVX2-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm3
1214; AVX2-NEXT:    vpxor {{.*}}(%rip), %xmm1, %xmm4
1215; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
1216; AVX2-NEXT:    vblendvpd %xmm3, %xmm0, %xmm2, %xmm0
1217; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
1218; AVX2-NEXT:    retq
1219;
1220; AVX512-LABEL: unsigned_sat_variable_v2i64_using_min:
1221; AVX512:       # %bb.0:
1222; AVX512-NEXT:    vmovdqa %xmm1, %xmm2
1223; AVX512-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm2
1224; AVX512-NEXT:    vpminuq %xmm2, %xmm0, %xmm0
1225; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
1226; AVX512-NEXT:    retq
1227  %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
1228  %c = icmp ult <2 x i64> %x, %noty
1229  %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %noty
1230  %r = add <2 x i64> %s, %y
1231  ret <2 x i64> %r
1232}
1233
1234define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i64> %y) {
1235; SSE2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1236; SSE2:       # %bb.0:
1237; SSE2-NEXT:    paddq %xmm0, %xmm1
1238; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
1239; SSE2-NEXT:    pxor %xmm2, %xmm0
1240; SSE2-NEXT:    pxor %xmm1, %xmm2
1241; SSE2-NEXT:    movdqa %xmm0, %xmm3
1242; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
1243; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1244; SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
1245; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1246; SSE2-NEXT:    pand %xmm4, %xmm2
1247; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1248; SSE2-NEXT:    por %xmm1, %xmm0
1249; SSE2-NEXT:    por %xmm2, %xmm0
1250; SSE2-NEXT:    retq
1251;
1252; SSE41-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1253; SSE41:       # %bb.0:
1254; SSE41-NEXT:    paddq %xmm0, %xmm1
1255; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
1256; SSE41-NEXT:    pxor %xmm2, %xmm0
1257; SSE41-NEXT:    pxor %xmm1, %xmm2
1258; SSE41-NEXT:    movdqa %xmm0, %xmm3
1259; SSE41-NEXT:    pcmpgtd %xmm2, %xmm3
1260; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1261; SSE41-NEXT:    pcmpeqd %xmm0, %xmm2
1262; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1263; SSE41-NEXT:    pand %xmm4, %xmm2
1264; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1265; SSE41-NEXT:    por %xmm1, %xmm0
1266; SSE41-NEXT:    por %xmm2, %xmm0
1267; SSE41-NEXT:    retq
1268;
1269; SSE42-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1270; SSE42:       # %bb.0:
1271; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1272; SSE42-NEXT:    paddq %xmm0, %xmm1
1273; SSE42-NEXT:    pxor %xmm2, %xmm0
1274; SSE42-NEXT:    pxor %xmm1, %xmm2
1275; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
1276; SSE42-NEXT:    por %xmm1, %xmm0
1277; SSE42-NEXT:    retq
1278;
1279; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1280; AVX2:       # %bb.0:
1281; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1282; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
1283; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
1284; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm1
1285; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm1
1286; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1287; AVX2-NEXT:    retq
1288;
1289; AVX512-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1290; AVX512:       # %bb.0:
1291; AVX512-NEXT:    vmovdqa %xmm1, %xmm2
1292; AVX512-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm2
1293; AVX512-NEXT:    vpminuq %xmm2, %xmm0, %xmm0
1294; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
1295; AVX512-NEXT:    retq
1296  %a = add <2 x i64> %x, %y
1297  %c = icmp ugt <2 x i64> %x, %a
1298  %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
1299  ret <2 x i64> %r
1300}
1301
1302define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_notval(<2 x i64> %x, <2 x i64> %y) {
1303; SSE2-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1304; SSE2:       # %bb.0:
1305; SSE2-NEXT:    movdqa %xmm0, %xmm2
1306; SSE2-NEXT:    paddq %xmm1, %xmm2
1307; SSE2-NEXT:    pxor {{.*}}(%rip), %xmm1
1308; SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
1309; SSE2-NEXT:    movdqa %xmm0, %xmm3
1310; SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
1311; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1312; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1313; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1314; SSE2-NEXT:    pand %xmm4, %xmm1
1315; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1316; SSE2-NEXT:    por %xmm2, %xmm0
1317; SSE2-NEXT:    por %xmm1, %xmm0
1318; SSE2-NEXT:    retq
1319;
1320; SSE41-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1321; SSE41:       # %bb.0:
1322; SSE41-NEXT:    movdqa %xmm0, %xmm2
1323; SSE41-NEXT:    paddq %xmm1, %xmm2
1324; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm1
1325; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
1326; SSE41-NEXT:    movdqa %xmm0, %xmm3
1327; SSE41-NEXT:    pcmpgtd %xmm1, %xmm3
1328; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1329; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1330; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1331; SSE41-NEXT:    pand %xmm4, %xmm1
1332; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1333; SSE41-NEXT:    por %xmm2, %xmm0
1334; SSE41-NEXT:    por %xmm1, %xmm0
1335; SSE41-NEXT:    retq
1336;
1337; SSE42-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1338; SSE42:       # %bb.0:
1339; SSE42-NEXT:    movdqa %xmm0, %xmm2
1340; SSE42-NEXT:    paddq %xmm1, %xmm2
1341; SSE42-NEXT:    pxor {{.*}}(%rip), %xmm1
1342; SSE42-NEXT:    pxor {{.*}}(%rip), %xmm0
1343; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
1344; SSE42-NEXT:    por %xmm2, %xmm0
1345; SSE42-NEXT:    retq
1346;
1347; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1348; AVX2:       # %bb.0:
1349; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm2
1350; AVX2-NEXT:    vpxor {{.*}}(%rip), %xmm1, %xmm1
1351; AVX2-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
1352; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
1353; AVX2-NEXT:    vpor %xmm2, %xmm0, %xmm0
1354; AVX2-NEXT:    retq
1355;
1356; AVX512-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1357; AVX512:       # %bb.0:
1358; AVX512-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1359; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm2
1360; AVX512-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm1
1361; AVX512-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k1
1362; AVX512-NEXT:    vmovdqa64 %xmm3, %xmm2 {%k1}
1363; AVX512-NEXT:    vmovdqa %xmm2, %xmm0
1364; AVX512-NEXT:    retq
1365  %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
1366  %a = add <2 x i64> %x, %y
1367  %c = icmp ugt <2 x i64> %x, %noty
1368  %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
1369  ret <2 x i64> %r
1370}
1371
1372