1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=ANY,SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ANY,SSE,SSE4,SSE41 4; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=ANY,SSE,SSE4,SSE42 5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ANY,AVX,AVX2 6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=ANY,AVX,AVX512 7 8; There are at least 3 potential patterns corresponding to an unsigned saturated add: min, cmp with sum, cmp with not. 9; Test each of those patterns with i8/i16/i32/i64. 10; Test each of those with a constant operand and a variable operand. 11; Test each of those with a 128-bit vector type. 12 13define i8 @unsigned_sat_constant_i8_using_min(i8 %x) { 14; ANY-LABEL: unsigned_sat_constant_i8_using_min: 15; ANY: # %bb.0: 16; ANY-NEXT: cmpb $-43, %dil 17; ANY-NEXT: movl $213, %eax 18; ANY-NEXT: cmovbl %edi, %eax 19; ANY-NEXT: addb $42, %al 20; ANY-NEXT: # kill: def $al killed $al killed $eax 21; ANY-NEXT: retq 22 %c = icmp ult i8 %x, -43 23 %s = select i1 %c, i8 %x, i8 -43 24 %r = add i8 %s, 42 25 ret i8 %r 26} 27 28define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) { 29; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_sum: 30; ANY: # %bb.0: 31; ANY-NEXT: addb $42, %dil 32; ANY-NEXT: movzbl %dil, %ecx 33; ANY-NEXT: movl $255, %eax 34; ANY-NEXT: cmovael %ecx, %eax 35; ANY-NEXT: # kill: def $al killed $al killed $eax 36; ANY-NEXT: retq 37 %a = add i8 %x, 42 38 %c = icmp ugt i8 %x, %a 39 %r = select i1 %c, i8 -1, i8 %a 40 ret i8 %r 41} 42 43define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) { 44; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_notval: 45; ANY: # %bb.0: 46; ANY-NEXT: addb $42, %dil 47; ANY-NEXT: movzbl %dil, %ecx 48; ANY-NEXT: movl $255, %eax 49; ANY-NEXT: cmovael %ecx, %eax 50; ANY-NEXT: # kill: def $al killed $al killed $eax 51; ANY-NEXT: retq 52 %a = add i8 %x, 42 53 %c = icmp ugt i8 %x, -43 54 %r = select i1 %c, i8 -1, i8 %a 55 ret i8 %r 56} 57 58define i16 @unsigned_sat_constant_i16_using_min(i16 %x) { 59; ANY-LABEL: unsigned_sat_constant_i16_using_min: 60; ANY: # %bb.0: 61; ANY-NEXT: cmpw $-43, %di 62; ANY-NEXT: movl $65493, %eax # imm = 0xFFD5 63; ANY-NEXT: cmovbl %edi, %eax 64; ANY-NEXT: addl $42, %eax 65; ANY-NEXT: # kill: def $ax killed $ax killed $eax 66; ANY-NEXT: retq 67 %c = icmp ult i16 %x, -43 68 %s = select i1 %c, i16 %x, i16 -43 69 %r = add i16 %s, 42 70 ret i16 %r 71} 72 73define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) { 74; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_sum: 75; ANY: # %bb.0: 76; ANY-NEXT: addw $42, %di 77; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF 78; ANY-NEXT: cmovael %edi, %eax 79; ANY-NEXT: # kill: def $ax killed $ax killed $eax 80; ANY-NEXT: retq 81 %a = add i16 %x, 42 82 %c = icmp ugt i16 %x, %a 83 %r = select i1 %c, i16 -1, i16 %a 84 ret i16 %r 85} 86 87define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) { 88; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_notval: 89; ANY: # %bb.0: 90; ANY-NEXT: addw $42, %di 91; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF 92; ANY-NEXT: cmovael %edi, %eax 93; ANY-NEXT: # kill: def $ax killed $ax killed $eax 94; ANY-NEXT: retq 95 %a = add i16 %x, 42 96 %c = icmp ugt i16 %x, -43 97 %r = select i1 %c, i16 -1, i16 %a 98 ret i16 %r 99} 100 101define i32 @unsigned_sat_constant_i32_using_min(i32 %x) { 102; ANY-LABEL: unsigned_sat_constant_i32_using_min: 103; ANY: # %bb.0: 104; ANY-NEXT: cmpl $-43, %edi 105; ANY-NEXT: movl $-43, %eax 106; ANY-NEXT: cmovbl %edi, %eax 107; ANY-NEXT: addl $42, %eax 108; ANY-NEXT: retq 109 %c = icmp ult i32 %x, -43 110 %s = select i1 %c, i32 %x, i32 -43 111 %r = add i32 %s, 42 112 ret i32 %r 113} 114 115define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) { 116; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_sum: 117; ANY: # %bb.0: 118; ANY-NEXT: addl $42, %edi 119; ANY-NEXT: movl $-1, %eax 120; ANY-NEXT: cmovael %edi, %eax 121; ANY-NEXT: retq 122 %a = add i32 %x, 42 123 %c = icmp ugt i32 %x, %a 124 %r = select i1 %c, i32 -1, i32 %a 125 ret i32 %r 126} 127 128define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) { 129; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_notval: 130; ANY: # %bb.0: 131; ANY-NEXT: addl $42, %edi 132; ANY-NEXT: movl $-1, %eax 133; ANY-NEXT: cmovael %edi, %eax 134; ANY-NEXT: retq 135 %a = add i32 %x, 42 136 %c = icmp ugt i32 %x, -43 137 %r = select i1 %c, i32 -1, i32 %a 138 ret i32 %r 139} 140 141define i64 @unsigned_sat_constant_i64_using_min(i64 %x) { 142; ANY-LABEL: unsigned_sat_constant_i64_using_min: 143; ANY: # %bb.0: 144; ANY-NEXT: cmpq $-43, %rdi 145; ANY-NEXT: movq $-43, %rax 146; ANY-NEXT: cmovbq %rdi, %rax 147; ANY-NEXT: addq $42, %rax 148; ANY-NEXT: retq 149 %c = icmp ult i64 %x, -43 150 %s = select i1 %c, i64 %x, i64 -43 151 %r = add i64 %s, 42 152 ret i64 %r 153} 154 155define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) { 156; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_sum: 157; ANY: # %bb.0: 158; ANY-NEXT: addq $42, %rdi 159; ANY-NEXT: movq $-1, %rax 160; ANY-NEXT: cmovaeq %rdi, %rax 161; ANY-NEXT: retq 162 %a = add i64 %x, 42 163 %c = icmp ugt i64 %x, %a 164 %r = select i1 %c, i64 -1, i64 %a 165 ret i64 %r 166} 167 168define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) { 169; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_notval: 170; ANY: # %bb.0: 171; ANY-NEXT: addq $42, %rdi 172; ANY-NEXT: movq $-1, %rax 173; ANY-NEXT: cmovaeq %rdi, %rax 174; ANY-NEXT: retq 175 %a = add i64 %x, 42 176 %c = icmp ugt i64 %x, -43 177 %r = select i1 %c, i64 -1, i64 %a 178 ret i64 %r 179} 180 181define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) { 182; ANY-LABEL: unsigned_sat_variable_i8_using_min: 183; ANY: # %bb.0: 184; ANY-NEXT: movl %esi, %eax 185; ANY-NEXT: notb %al 186; ANY-NEXT: cmpb %al, %dil 187; ANY-NEXT: movzbl %al, %eax 188; ANY-NEXT: cmovbl %edi, %eax 189; ANY-NEXT: addb %sil, %al 190; ANY-NEXT: # kill: def $al killed $al killed $eax 191; ANY-NEXT: retq 192 %noty = xor i8 %y, -1 193 %c = icmp ult i8 %x, %noty 194 %s = select i1 %c, i8 %x, i8 %noty 195 %r = add i8 %s, %y 196 ret i8 %r 197} 198 199define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) { 200; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_sum: 201; ANY: # %bb.0: 202; ANY-NEXT: addb %sil, %dil 203; ANY-NEXT: movzbl %dil, %ecx 204; ANY-NEXT: movl $255, %eax 205; ANY-NEXT: cmovael %ecx, %eax 206; ANY-NEXT: # kill: def $al killed $al killed $eax 207; ANY-NEXT: retq 208 %a = add i8 %x, %y 209 %c = icmp ugt i8 %x, %a 210 %r = select i1 %c, i8 -1, i8 %a 211 ret i8 %r 212} 213 214define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) { 215; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_notval: 216; ANY: # %bb.0: 217; ANY-NEXT: addb %dil, %sil 218; ANY-NEXT: movzbl %sil, %ecx 219; ANY-NEXT: movl $255, %eax 220; ANY-NEXT: cmovael %ecx, %eax 221; ANY-NEXT: # kill: def $al killed $al killed $eax 222; ANY-NEXT: retq 223 %noty = xor i8 %y, -1 224 %a = add i8 %x, %y 225 %c = icmp ugt i8 %x, %noty 226 %r = select i1 %c, i8 -1, i8 %a 227 ret i8 %r 228} 229 230define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) { 231; ANY-LABEL: unsigned_sat_variable_i16_using_min: 232; ANY: # %bb.0: 233; ANY-NEXT: # kill: def $esi killed $esi def $rsi 234; ANY-NEXT: movl %esi, %eax 235; ANY-NEXT: notl %eax 236; ANY-NEXT: cmpw %ax, %di 237; ANY-NEXT: cmovbl %edi, %eax 238; ANY-NEXT: addl %esi, %eax 239; ANY-NEXT: # kill: def $ax killed $ax killed $eax 240; ANY-NEXT: retq 241 %noty = xor i16 %y, -1 242 %c = icmp ult i16 %x, %noty 243 %s = select i1 %c, i16 %x, i16 %noty 244 %r = add i16 %s, %y 245 ret i16 %r 246} 247 248define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) { 249; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_sum: 250; ANY: # %bb.0: 251; ANY-NEXT: addw %si, %di 252; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF 253; ANY-NEXT: cmovael %edi, %eax 254; ANY-NEXT: # kill: def $ax killed $ax killed $eax 255; ANY-NEXT: retq 256 %a = add i16 %x, %y 257 %c = icmp ugt i16 %x, %a 258 %r = select i1 %c, i16 -1, i16 %a 259 ret i16 %r 260} 261 262define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) { 263; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_notval: 264; ANY: # %bb.0: 265; ANY-NEXT: addw %di, %si 266; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF 267; ANY-NEXT: cmovael %esi, %eax 268; ANY-NEXT: # kill: def $ax killed $ax killed $eax 269; ANY-NEXT: retq 270 %noty = xor i16 %y, -1 271 %a = add i16 %x, %y 272 %c = icmp ugt i16 %x, %noty 273 %r = select i1 %c, i16 -1, i16 %a 274 ret i16 %r 275} 276 277define i32 @unsigned_sat_variable_i32_using_min(i32 %x, i32 %y) { 278; ANY-LABEL: unsigned_sat_variable_i32_using_min: 279; ANY: # %bb.0: 280; ANY-NEXT: # kill: def $esi killed $esi def $rsi 281; ANY-NEXT: movl %esi, %eax 282; ANY-NEXT: notl %eax 283; ANY-NEXT: cmpl %eax, %edi 284; ANY-NEXT: cmovbl %edi, %eax 285; ANY-NEXT: addl %esi, %eax 286; ANY-NEXT: retq 287 %noty = xor i32 %y, -1 288 %c = icmp ult i32 %x, %noty 289 %s = select i1 %c, i32 %x, i32 %noty 290 %r = add i32 %s, %y 291 ret i32 %r 292} 293 294define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) { 295; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_sum: 296; ANY: # %bb.0: 297; ANY-NEXT: addl %esi, %edi 298; ANY-NEXT: movl $-1, %eax 299; ANY-NEXT: cmovael %edi, %eax 300; ANY-NEXT: retq 301 %a = add i32 %x, %y 302 %c = icmp ugt i32 %x, %a 303 %r = select i1 %c, i32 -1, i32 %a 304 ret i32 %r 305} 306 307define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) { 308; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_notval: 309; ANY: # %bb.0: 310; ANY-NEXT: addl %esi, %edi 311; ANY-NEXT: movl $-1, %eax 312; ANY-NEXT: cmovael %edi, %eax 313; ANY-NEXT: retq 314 %noty = xor i32 %y, -1 315 %a = add i32 %x, %y 316 %c = icmp ugt i32 %x, %noty 317 %r = select i1 %c, i32 -1, i32 %a 318 ret i32 %r 319} 320 321define i64 @unsigned_sat_variable_i64_using_min(i64 %x, i64 %y) { 322; ANY-LABEL: unsigned_sat_variable_i64_using_min: 323; ANY: # %bb.0: 324; ANY-NEXT: movq %rsi, %rax 325; ANY-NEXT: notq %rax 326; ANY-NEXT: cmpq %rax, %rdi 327; ANY-NEXT: cmovbq %rdi, %rax 328; ANY-NEXT: addq %rsi, %rax 329; ANY-NEXT: retq 330 %noty = xor i64 %y, -1 331 %c = icmp ult i64 %x, %noty 332 %s = select i1 %c, i64 %x, i64 %noty 333 %r = add i64 %s, %y 334 ret i64 %r 335} 336 337define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) { 338; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_sum: 339; ANY: # %bb.0: 340; ANY-NEXT: addq %rsi, %rdi 341; ANY-NEXT: movq $-1, %rax 342; ANY-NEXT: cmovaeq %rdi, %rax 343; ANY-NEXT: retq 344 %a = add i64 %x, %y 345 %c = icmp ugt i64 %x, %a 346 %r = select i1 %c, i64 -1, i64 %a 347 ret i64 %r 348} 349 350define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) { 351; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_notval: 352; ANY: # %bb.0: 353; ANY-NEXT: addq %rsi, %rdi 354; ANY-NEXT: movq $-1, %rax 355; ANY-NEXT: cmovaeq %rdi, %rax 356; ANY-NEXT: retq 357 %noty = xor i64 %y, -1 358 %a = add i64 %x, %y 359 %c = icmp ugt i64 %x, %noty 360 %r = select i1 %c, i64 -1, i64 %a 361 ret i64 %r 362} 363 364define <16 x i8> @unsigned_sat_constant_v16i8_using_min(<16 x i8> %x) { 365; SSE-LABEL: unsigned_sat_constant_v16i8_using_min: 366; SSE: # %bb.0: 367; SSE-NEXT: pminub {{.*}}(%rip), %xmm0 368; SSE-NEXT: paddb {{.*}}(%rip), %xmm0 369; SSE-NEXT: retq 370; 371; AVX-LABEL: unsigned_sat_constant_v16i8_using_min: 372; AVX: # %bb.0: 373; AVX-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm0 374; AVX-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 375; AVX-NEXT: retq 376 %c = icmp ult <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43> 377 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43> 378 %r = add <16 x i8> %s, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 379 ret <16 x i8> %r 380} 381 382define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) { 383; SSE-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum: 384; SSE: # %bb.0: 385; SSE-NEXT: paddusb {{.*}}(%rip), %xmm0 386; SSE-NEXT: retq 387; 388; AVX-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum: 389; AVX: # %bb.0: 390; AVX-NEXT: vpaddusb {{.*}}(%rip), %xmm0, %xmm0 391; AVX-NEXT: retq 392 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 393 %c = icmp ugt <16 x i8> %x, %a 394 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a 395 ret <16 x i8> %r 396} 397 398define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_notval(<16 x i8> %x) { 399; SSE-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval: 400; SSE: # %bb.0: 401; SSE-NEXT: paddusb {{.*}}(%rip), %xmm0 402; SSE-NEXT: retq 403; 404; AVX-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval: 405; AVX: # %bb.0: 406; AVX-NEXT: vpaddusb {{.*}}(%rip), %xmm0, %xmm0 407; AVX-NEXT: retq 408 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 409 %c = icmp ugt <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43> 410 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a 411 ret <16 x i8> %r 412} 413 414define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) { 415; SSE2-LABEL: unsigned_sat_constant_v8i16_using_min: 416; SSE2: # %bb.0: 417; SSE2-NEXT: movdqa %xmm0, %xmm1 418; SSE2-NEXT: psubusw {{.*}}(%rip), %xmm1 419; SSE2-NEXT: psubw %xmm1, %xmm0 420; SSE2-NEXT: paddw {{.*}}(%rip), %xmm0 421; SSE2-NEXT: retq 422; 423; SSE4-LABEL: unsigned_sat_constant_v8i16_using_min: 424; SSE4: # %bb.0: 425; SSE4-NEXT: pminuw {{.*}}(%rip), %xmm0 426; SSE4-NEXT: paddw {{.*}}(%rip), %xmm0 427; SSE4-NEXT: retq 428; 429; AVX-LABEL: unsigned_sat_constant_v8i16_using_min: 430; AVX: # %bb.0: 431; AVX-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 432; AVX-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 433; AVX-NEXT: retq 434 %c = icmp ult <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43> 435 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43> 436 %r = add <8 x i16> %s, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 437 ret <8 x i16> %r 438} 439 440define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) { 441; SSE-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum: 442; SSE: # %bb.0: 443; SSE-NEXT: paddusw {{.*}}(%rip), %xmm0 444; SSE-NEXT: retq 445; 446; AVX-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum: 447; AVX: # %bb.0: 448; AVX-NEXT: vpaddusw {{.*}}(%rip), %xmm0, %xmm0 449; AVX-NEXT: retq 450 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 451 %c = icmp ugt <8 x i16> %x, %a 452 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a 453 ret <8 x i16> %r 454} 455 456define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_notval(<8 x i16> %x) { 457; SSE-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval: 458; SSE: # %bb.0: 459; SSE-NEXT: paddusw {{.*}}(%rip), %xmm0 460; SSE-NEXT: retq 461; 462; AVX-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval: 463; AVX: # %bb.0: 464; AVX-NEXT: vpaddusw {{.*}}(%rip), %xmm0, %xmm0 465; AVX-NEXT: retq 466 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 467 %c = icmp ugt <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43> 468 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a 469 ret <8 x i16> %r 470} 471 472define <4 x i32> @unsigned_sat_constant_v4i32_using_min(<4 x i32> %x) { 473; SSE2-LABEL: unsigned_sat_constant_v4i32_using_min: 474; SSE2: # %bb.0: 475; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] 476; SSE2-NEXT: pxor %xmm0, %xmm1 477; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483605,2147483605,2147483605,2147483605] 478; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 479; SSE2-NEXT: pand %xmm2, %xmm0 480; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2 481; SSE2-NEXT: por %xmm2, %xmm0 482; SSE2-NEXT: paddd {{.*}}(%rip), %xmm0 483; SSE2-NEXT: retq 484; 485; SSE4-LABEL: unsigned_sat_constant_v4i32_using_min: 486; SSE4: # %bb.0: 487; SSE4-NEXT: pminud {{.*}}(%rip), %xmm0 488; SSE4-NEXT: paddd {{.*}}(%rip), %xmm0 489; SSE4-NEXT: retq 490; 491; AVX2-LABEL: unsigned_sat_constant_v4i32_using_min: 492; AVX2: # %bb.0: 493; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967253,4294967253,4294967253,4294967253] 494; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 495; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42] 496; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 497; AVX2-NEXT: retq 498; 499; AVX512-LABEL: unsigned_sat_constant_v4i32_using_min: 500; AVX512: # %bb.0: 501; AVX512-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0 502; AVX512-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 503; AVX512-NEXT: retq 504 %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43> 505 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 -43, i32 -43, i32 -43, i32 -43> 506 %r = add <4 x i32> %s, <i32 42, i32 42, i32 42, i32 42> 507 ret <4 x i32> %r 508} 509 510define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) { 511; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum: 512; SSE2: # %bb.0: 513; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42] 514; SSE2-NEXT: paddd %xmm0, %xmm1 515; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 516; SSE2-NEXT: pxor %xmm2, %xmm0 517; SSE2-NEXT: pxor %xmm1, %xmm2 518; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 519; SSE2-NEXT: por %xmm1, %xmm0 520; SSE2-NEXT: retq 521; 522; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum: 523; SSE4: # %bb.0: 524; SSE4-NEXT: pminud {{.*}}(%rip), %xmm0 525; SSE4-NEXT: paddd {{.*}}(%rip), %xmm0 526; SSE4-NEXT: retq 527; 528; AVX2-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum: 529; AVX2: # %bb.0: 530; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42] 531; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4294967253,4294967253,4294967253,4294967253] 532; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0 533; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 534; AVX2-NEXT: retq 535; 536; AVX512-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum: 537; AVX512: # %bb.0: 538; AVX512-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0 539; AVX512-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 540; AVX512-NEXT: retq 541 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> 542 %c = icmp ugt <4 x i32> %x, %a 543 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a 544 ret <4 x i32> %r 545} 546 547define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval(<4 x i32> %x) { 548; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval: 549; SSE2: # %bb.0: 550; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42] 551; SSE2-NEXT: paddd %xmm0, %xmm1 552; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 553; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 554; SSE2-NEXT: por %xmm1, %xmm0 555; SSE2-NEXT: retq 556; 557; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval: 558; SSE4: # %bb.0: 559; SSE4-NEXT: pminud {{.*}}(%rip), %xmm0 560; SSE4-NEXT: paddd {{.*}}(%rip), %xmm0 561; SSE4-NEXT: retq 562; 563; AVX2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval: 564; AVX2: # %bb.0: 565; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42] 566; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4294967253,4294967253,4294967253,4294967253] 567; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0 568; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 569; AVX2-NEXT: retq 570; 571; AVX512-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval: 572; AVX512: # %bb.0: 573; AVX512-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0 574; AVX512-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 575; AVX512-NEXT: retq 576 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> 577 %c = icmp ugt <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43> 578 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a 579 ret <4 x i32> %r 580} 581 582define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat(<4 x i32> %x) { 583; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat: 584; SSE2: # %bb.0: 585; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [43,44,45,46] 586; SSE2-NEXT: paddd %xmm0, %xmm1 587; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 588; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 589; SSE2-NEXT: por %xmm1, %xmm0 590; SSE2-NEXT: retq 591; 592; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat: 593; SSE4: # %bb.0: 594; SSE4-NEXT: pminud {{.*}}(%rip), %xmm0 595; SSE4-NEXT: paddd {{.*}}(%rip), %xmm0 596; SSE4-NEXT: retq 597; 598; AVX-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat: 599; AVX: # %bb.0: 600; AVX-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 601; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 602; AVX-NEXT: retq 603 %a = add <4 x i32> %x, <i32 43, i32 44, i32 45, i32 46> 604 %c = icmp ugt <4 x i32> %x, <i32 -44, i32 -45, i32 -46, i32 -47> 605 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a 606 ret <4 x i32> %r 607} 608 609define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) { 610; SSE2-LABEL: unsigned_sat_constant_v2i64_using_min: 611; SSE2: # %bb.0: 612; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] 613; SSE2-NEXT: pxor %xmm0, %xmm1 614; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117] 615; SSE2-NEXT: movdqa %xmm2, %xmm3 616; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 617; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 618; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 619; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 620; SSE2-NEXT: pand %xmm4, %xmm1 621; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 622; SSE2-NEXT: por %xmm1, %xmm2 623; SSE2-NEXT: pand %xmm2, %xmm0 624; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2 625; SSE2-NEXT: por %xmm2, %xmm0 626; SSE2-NEXT: paddq {{.*}}(%rip), %xmm0 627; SSE2-NEXT: retq 628; 629; SSE41-LABEL: unsigned_sat_constant_v2i64_using_min: 630; SSE41: # %bb.0: 631; SSE41-NEXT: movdqa %xmm0, %xmm1 632; SSE41-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551573,18446744073709551573] 633; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456] 634; SSE41-NEXT: pxor %xmm1, %xmm0 635; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372034707292117,9223372034707292117] 636; SSE41-NEXT: movdqa %xmm3, %xmm4 637; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 638; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 639; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 640; SSE41-NEXT: pand %xmm4, %xmm0 641; SSE41-NEXT: por %xmm3, %xmm0 642; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 643; SSE41-NEXT: paddq {{.*}}(%rip), %xmm2 644; SSE41-NEXT: movdqa %xmm2, %xmm0 645; SSE41-NEXT: retq 646; 647; SSE42-LABEL: unsigned_sat_constant_v2i64_using_min: 648; SSE42: # %bb.0: 649; SSE42-NEXT: movdqa %xmm0, %xmm1 650; SSE42-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551573,18446744073709551573] 651; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 652; SSE42-NEXT: pxor %xmm0, %xmm3 653; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775765,9223372036854775765] 654; SSE42-NEXT: pcmpgtq %xmm3, %xmm0 655; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 656; SSE42-NEXT: paddq {{.*}}(%rip), %xmm2 657; SSE42-NEXT: movdqa %xmm2, %xmm0 658; SSE42-NEXT: retq 659; 660; AVX2-LABEL: unsigned_sat_constant_v2i64_using_min: 661; AVX2: # %bb.0: 662; AVX2-NEXT: vmovapd {{.*#+}} xmm1 = [18446744073709551573,18446744073709551573] 663; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2 664; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775765,9223372036854775765] 665; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 666; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 667; AVX2-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 668; AVX2-NEXT: retq 669; 670; AVX512-LABEL: unsigned_sat_constant_v2i64_using_min: 671; AVX512: # %bb.0: 672; AVX512-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0 673; AVX512-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 674; AVX512-NEXT: retq 675 %c = icmp ult <2 x i64> %x, <i64 -43, i64 -43> 676 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> <i64 -43, i64 -43> 677 %r = add <2 x i64> %s, <i64 42, i64 42> 678 ret <2 x i64> %r 679} 680 681define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) { 682; SSE2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum: 683; SSE2: # %bb.0: 684; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42] 685; SSE2-NEXT: paddq %xmm0, %xmm1 686; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 687; SSE2-NEXT: pxor %xmm2, %xmm0 688; SSE2-NEXT: pxor %xmm1, %xmm2 689; SSE2-NEXT: movdqa %xmm0, %xmm3 690; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 691; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 692; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 693; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 694; SSE2-NEXT: pand %xmm4, %xmm2 695; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 696; SSE2-NEXT: por %xmm1, %xmm0 697; SSE2-NEXT: por %xmm2, %xmm0 698; SSE2-NEXT: retq 699; 700; SSE41-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum: 701; SSE41: # %bb.0: 702; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [42,42] 703; SSE41-NEXT: paddq %xmm0, %xmm1 704; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 705; SSE41-NEXT: pxor %xmm2, %xmm0 706; SSE41-NEXT: pxor %xmm1, %xmm2 707; SSE41-NEXT: movdqa %xmm0, %xmm3 708; SSE41-NEXT: pcmpgtd %xmm2, %xmm3 709; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 710; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 711; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 712; SSE41-NEXT: pand %xmm4, %xmm2 713; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 714; SSE41-NEXT: por %xmm1, %xmm0 715; SSE41-NEXT: por %xmm2, %xmm0 716; SSE41-NEXT: retq 717; 718; SSE42-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum: 719; SSE42: # %bb.0: 720; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 721; SSE42-NEXT: movdqa %xmm0, %xmm1 722; SSE42-NEXT: pxor %xmm2, %xmm1 723; SSE42-NEXT: paddq {{.*}}(%rip), %xmm0 724; SSE42-NEXT: pxor %xmm0, %xmm2 725; SSE42-NEXT: pcmpgtq %xmm2, %xmm1 726; SSE42-NEXT: por %xmm0, %xmm1 727; SSE42-NEXT: movdqa %xmm1, %xmm0 728; SSE42-NEXT: retq 729; 730; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum: 731; AVX2: # %bb.0: 732; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] 733; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2 734; AVX2-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 735; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1 736; AVX2-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 737; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 738; AVX2-NEXT: retq 739; 740; AVX512-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum: 741; AVX512: # %bb.0: 742; AVX512-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0 743; AVX512-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 744; AVX512-NEXT: retq 745 %a = add <2 x i64> %x, <i64 42, i64 42> 746 %c = icmp ugt <2 x i64> %x, %a 747 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a 748 ret <2 x i64> %r 749} 750 751define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) { 752; SSE2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: 753; SSE2: # %bb.0: 754; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42] 755; SSE2-NEXT: paddq %xmm0, %xmm1 756; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 757; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117] 758; SSE2-NEXT: movdqa %xmm0, %xmm3 759; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 760; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 761; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 762; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 763; SSE2-NEXT: pand %xmm4, %xmm2 764; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 765; SSE2-NEXT: por %xmm1, %xmm0 766; SSE2-NEXT: por %xmm2, %xmm0 767; SSE2-NEXT: retq 768; 769; SSE41-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: 770; SSE41: # %bb.0: 771; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [42,42] 772; SSE41-NEXT: paddq %xmm0, %xmm1 773; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 774; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117] 775; SSE41-NEXT: movdqa %xmm0, %xmm3 776; SSE41-NEXT: pcmpgtd %xmm2, %xmm3 777; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 778; SSE41-NEXT: pcmpeqd %xmm2, %xmm0 779; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 780; SSE41-NEXT: pand %xmm4, %xmm2 781; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 782; SSE41-NEXT: por %xmm1, %xmm0 783; SSE41-NEXT: por %xmm2, %xmm0 784; SSE41-NEXT: retq 785; 786; SSE42-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: 787; SSE42: # %bb.0: 788; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 789; SSE42-NEXT: movdqa %xmm0, %xmm1 790; SSE42-NEXT: pxor %xmm2, %xmm1 791; SSE42-NEXT: paddq {{.*}}(%rip), %xmm0 792; SSE42-NEXT: pxor %xmm0, %xmm2 793; SSE42-NEXT: pcmpgtq %xmm2, %xmm1 794; SSE42-NEXT: por %xmm0, %xmm1 795; SSE42-NEXT: movdqa %xmm1, %xmm0 796; SSE42-NEXT: retq 797; 798; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: 799; AVX2: # %bb.0: 800; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] 801; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2 802; AVX2-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 803; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1 804; AVX2-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 805; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 806; AVX2-NEXT: retq 807; 808; AVX512-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: 809; AVX512: # %bb.0: 810; AVX512-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0 811; AVX512-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 812; AVX512-NEXT: retq 813 %a = add <2 x i64> %x, <i64 42, i64 42> 814 %c = icmp ugt <2 x i64> %x, <i64 -43, i64 -43> 815 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a 816 ret <2 x i64> %r 817} 818 819define <16 x i8> @unsigned_sat_variable_v16i8_using_min(<16 x i8> %x, <16 x i8> %y) { 820; SSE-LABEL: unsigned_sat_variable_v16i8_using_min: 821; SSE: # %bb.0: 822; SSE-NEXT: pcmpeqd %xmm2, %xmm2 823; SSE-NEXT: pxor %xmm1, %xmm2 824; SSE-NEXT: pminub %xmm2, %xmm0 825; SSE-NEXT: paddb %xmm1, %xmm0 826; SSE-NEXT: retq 827; 828; AVX2-LABEL: unsigned_sat_variable_v16i8_using_min: 829; AVX2: # %bb.0: 830; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 831; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 832; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 833; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 834; AVX2-NEXT: retq 835; 836; AVX512-LABEL: unsigned_sat_variable_v16i8_using_min: 837; AVX512: # %bb.0: 838; AVX512-NEXT: vmovdqa %xmm1, %xmm2 839; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2 840; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 841; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 842; AVX512-NEXT: retq 843 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 844 %c = icmp ult <16 x i8> %x, %noty 845 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %noty 846 %r = add <16 x i8> %s, %y 847 ret <16 x i8> %r 848} 849 850define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_sum(<16 x i8> %x, <16 x i8> %y) { 851; SSE-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum: 852; SSE: # %bb.0: 853; SSE-NEXT: paddusb %xmm1, %xmm0 854; SSE-NEXT: retq 855; 856; AVX-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum: 857; AVX: # %bb.0: 858; AVX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 859; AVX-NEXT: retq 860 %a = add <16 x i8> %x, %y 861 %c = icmp ugt <16 x i8> %x, %a 862 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a 863 ret <16 x i8> %r 864} 865 866define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_notval(<16 x i8> %x, <16 x i8> %y) { 867; SSE-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval: 868; SSE: # %bb.0: 869; SSE-NEXT: pcmpeqd %xmm2, %xmm2 870; SSE-NEXT: movdqa %xmm0, %xmm3 871; SSE-NEXT: paddb %xmm1, %xmm3 872; SSE-NEXT: pxor %xmm2, %xmm1 873; SSE-NEXT: pminub %xmm0, %xmm1 874; SSE-NEXT: pcmpeqb %xmm1, %xmm0 875; SSE-NEXT: pxor %xmm2, %xmm0 876; SSE-NEXT: por %xmm3, %xmm0 877; SSE-NEXT: retq 878; 879; AVX2-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval: 880; AVX2: # %bb.0: 881; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 882; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 883; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm1 884; AVX2-NEXT: vpminub %xmm3, %xmm0, %xmm3 885; AVX2-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 886; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 887; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 888; AVX2-NEXT: retq 889; 890; AVX512-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval: 891; AVX512: # %bb.0: 892; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 893; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm3 894; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1 895; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm1 896; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 897; AVX512-NEXT: vpternlogq $222, %xmm2, %xmm3, %xmm0 898; AVX512-NEXT: retq 899 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 900 %a = add <16 x i8> %x, %y 901 %c = icmp ugt <16 x i8> %x, %noty 902 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a 903 ret <16 x i8> %r 904} 905 906define <8 x i16> @unsigned_sat_variable_v8i16_using_min(<8 x i16> %x, <8 x i16> %y) { 907; SSE2-LABEL: unsigned_sat_variable_v8i16_using_min: 908; SSE2: # %bb.0: 909; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 910; SSE2-NEXT: pxor %xmm1, %xmm2 911; SSE2-NEXT: movdqa %xmm0, %xmm3 912; SSE2-NEXT: psubusw %xmm2, %xmm3 913; SSE2-NEXT: psubw %xmm3, %xmm0 914; SSE2-NEXT: paddw %xmm1, %xmm0 915; SSE2-NEXT: retq 916; 917; SSE4-LABEL: unsigned_sat_variable_v8i16_using_min: 918; SSE4: # %bb.0: 919; SSE4-NEXT: pcmpeqd %xmm2, %xmm2 920; SSE4-NEXT: pxor %xmm1, %xmm2 921; SSE4-NEXT: pminuw %xmm2, %xmm0 922; SSE4-NEXT: paddw %xmm1, %xmm0 923; SSE4-NEXT: retq 924; 925; AVX2-LABEL: unsigned_sat_variable_v8i16_using_min: 926; AVX2: # %bb.0: 927; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 928; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 929; AVX2-NEXT: vpminuw %xmm2, %xmm0, %xmm0 930; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 931; AVX2-NEXT: retq 932; 933; AVX512-LABEL: unsigned_sat_variable_v8i16_using_min: 934; AVX512: # %bb.0: 935; AVX512-NEXT: vmovdqa %xmm1, %xmm2 936; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2 937; AVX512-NEXT: vpminuw %xmm2, %xmm0, %xmm0 938; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0 939; AVX512-NEXT: retq 940 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 941 %c = icmp ult <8 x i16> %x, %noty 942 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %noty 943 %r = add <8 x i16> %s, %y 944 ret <8 x i16> %r 945} 946 947define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_sum(<8 x i16> %x, <8 x i16> %y) { 948; SSE-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum: 949; SSE: # %bb.0: 950; SSE-NEXT: paddusw %xmm1, %xmm0 951; SSE-NEXT: retq 952; 953; AVX-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum: 954; AVX: # %bb.0: 955; AVX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 956; AVX-NEXT: retq 957 %a = add <8 x i16> %x, %y 958 %c = icmp ugt <8 x i16> %x, %a 959 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a 960 ret <8 x i16> %r 961} 962 963define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_notval(<8 x i16> %x, <8 x i16> %y) { 964; SSE2-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval: 965; SSE2: # %bb.0: 966; SSE2-NEXT: movdqa %xmm0, %xmm2 967; SSE2-NEXT: paddw %xmm1, %xmm2 968; SSE2-NEXT: pxor {{.*}}(%rip), %xmm1 969; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 970; SSE2-NEXT: pcmpgtw %xmm1, %xmm0 971; SSE2-NEXT: por %xmm2, %xmm0 972; SSE2-NEXT: retq 973; 974; SSE4-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval: 975; SSE4: # %bb.0: 976; SSE4-NEXT: pcmpeqd %xmm2, %xmm2 977; SSE4-NEXT: movdqa %xmm0, %xmm3 978; SSE4-NEXT: paddw %xmm1, %xmm3 979; SSE4-NEXT: pxor %xmm2, %xmm1 980; SSE4-NEXT: pminuw %xmm0, %xmm1 981; SSE4-NEXT: pcmpeqw %xmm1, %xmm0 982; SSE4-NEXT: pxor %xmm2, %xmm0 983; SSE4-NEXT: por %xmm3, %xmm0 984; SSE4-NEXT: retq 985; 986; AVX2-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval: 987; AVX2: # %bb.0: 988; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 989; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 990; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm1 991; AVX2-NEXT: vpminuw %xmm3, %xmm0, %xmm3 992; AVX2-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 993; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 994; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 995; AVX2-NEXT: retq 996; 997; AVX512-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval: 998; AVX512: # %bb.0: 999; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1000; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm3 1001; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1 1002; AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm1 1003; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 1004; AVX512-NEXT: vpternlogq $222, %xmm2, %xmm3, %xmm0 1005; AVX512-NEXT: retq 1006 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1007 %a = add <8 x i16> %x, %y 1008 %c = icmp ugt <8 x i16> %x, %noty 1009 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a 1010 ret <8 x i16> %r 1011} 1012 1013define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32> %y) { 1014; SSE2-LABEL: unsigned_sat_variable_v4i32_using_min: 1015; SSE2: # %bb.0: 1016; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 1017; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] 1018; SSE2-NEXT: pxor %xmm0, %xmm3 1019; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483647,2147483647,2147483647,2147483647] 1020; SSE2-NEXT: pxor %xmm1, %xmm4 1021; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1022; SSE2-NEXT: pand %xmm4, %xmm0 1023; SSE2-NEXT: pxor %xmm2, %xmm4 1024; SSE2-NEXT: movdqa %xmm1, %xmm2 1025; SSE2-NEXT: pandn %xmm4, %xmm2 1026; SSE2-NEXT: por %xmm2, %xmm0 1027; SSE2-NEXT: paddd %xmm1, %xmm0 1028; SSE2-NEXT: retq 1029; 1030; SSE4-LABEL: unsigned_sat_variable_v4i32_using_min: 1031; SSE4: # %bb.0: 1032; SSE4-NEXT: pcmpeqd %xmm2, %xmm2 1033; SSE4-NEXT: pxor %xmm1, %xmm2 1034; SSE4-NEXT: pminud %xmm2, %xmm0 1035; SSE4-NEXT: paddd %xmm1, %xmm0 1036; SSE4-NEXT: retq 1037; 1038; AVX2-LABEL: unsigned_sat_variable_v4i32_using_min: 1039; AVX2: # %bb.0: 1040; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1041; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 1042; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0 1043; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1044; AVX2-NEXT: retq 1045; 1046; AVX512-LABEL: unsigned_sat_variable_v4i32_using_min: 1047; AVX512: # %bb.0: 1048; AVX512-NEXT: vmovdqa %xmm1, %xmm2 1049; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2 1050; AVX512-NEXT: vpminud %xmm2, %xmm0, %xmm0 1051; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1052; AVX512-NEXT: retq 1053 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 1054 %c = icmp ult <4 x i32> %x, %noty 1055 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %noty 1056 %r = add <4 x i32> %s, %y 1057 ret <4 x i32> %r 1058} 1059 1060define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_sum(<4 x i32> %x, <4 x i32> %y) { 1061; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum: 1062; SSE2: # %bb.0: 1063; SSE2-NEXT: paddd %xmm0, %xmm1 1064; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 1065; SSE2-NEXT: pxor %xmm2, %xmm0 1066; SSE2-NEXT: pxor %xmm1, %xmm2 1067; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 1068; SSE2-NEXT: por %xmm1, %xmm0 1069; SSE2-NEXT: retq 1070; 1071; SSE4-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum: 1072; SSE4: # %bb.0: 1073; SSE4-NEXT: pcmpeqd %xmm2, %xmm2 1074; SSE4-NEXT: pxor %xmm1, %xmm2 1075; SSE4-NEXT: pminud %xmm2, %xmm0 1076; SSE4-NEXT: paddd %xmm1, %xmm0 1077; SSE4-NEXT: retq 1078; 1079; AVX2-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum: 1080; AVX2: # %bb.0: 1081; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1082; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 1083; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0 1084; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1085; AVX2-NEXT: retq 1086; 1087; AVX512-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum: 1088; AVX512: # %bb.0: 1089; AVX512-NEXT: vmovdqa %xmm1, %xmm2 1090; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2 1091; AVX512-NEXT: vpminud %xmm2, %xmm0, %xmm0 1092; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1093; AVX512-NEXT: retq 1094 %a = add <4 x i32> %x, %y 1095 %c = icmp ugt <4 x i32> %x, %a 1096 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a 1097 ret <4 x i32> %r 1098} 1099 1100define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_notval(<4 x i32> %x, <4 x i32> %y) { 1101; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval: 1102; SSE2: # %bb.0: 1103; SSE2-NEXT: movdqa %xmm0, %xmm2 1104; SSE2-NEXT: paddd %xmm1, %xmm2 1105; SSE2-NEXT: pxor {{.*}}(%rip), %xmm1 1106; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 1107; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 1108; SSE2-NEXT: por %xmm2, %xmm0 1109; SSE2-NEXT: retq 1110; 1111; SSE4-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval: 1112; SSE4: # %bb.0: 1113; SSE4-NEXT: pcmpeqd %xmm2, %xmm2 1114; SSE4-NEXT: movdqa %xmm0, %xmm3 1115; SSE4-NEXT: paddd %xmm1, %xmm3 1116; SSE4-NEXT: pxor %xmm2, %xmm1 1117; SSE4-NEXT: pminud %xmm0, %xmm1 1118; SSE4-NEXT: pcmpeqd %xmm1, %xmm0 1119; SSE4-NEXT: pxor %xmm2, %xmm0 1120; SSE4-NEXT: por %xmm3, %xmm0 1121; SSE4-NEXT: retq 1122; 1123; AVX2-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval: 1124; AVX2: # %bb.0: 1125; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1126; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 1127; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 1128; AVX2-NEXT: vpminud %xmm3, %xmm0, %xmm3 1129; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 1130; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 1131; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1132; AVX2-NEXT: retq 1133; 1134; AVX512-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval: 1135; AVX512: # %bb.0: 1136; AVX512-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1137; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm2 1138; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1 1139; AVX512-NEXT: vpcmpnleud %xmm1, %xmm0, %k1 1140; AVX512-NEXT: vmovdqa32 %xmm3, %xmm2 {%k1} 1141; AVX512-NEXT: vmovdqa %xmm2, %xmm0 1142; AVX512-NEXT: retq 1143 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 1144 %a = add <4 x i32> %x, %y 1145 %c = icmp ugt <4 x i32> %x, %noty 1146 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a 1147 ret <4 x i32> %r 1148} 1149 1150define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64> %y) { 1151; SSE2-LABEL: unsigned_sat_variable_v2i64_using_min: 1152; SSE2: # %bb.0: 1153; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 1154; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] 1155; SSE2-NEXT: pxor %xmm0, %xmm3 1156; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372034707292159,9223372034707292159] 1157; SSE2-NEXT: pxor %xmm1, %xmm4 1158; SSE2-NEXT: movdqa %xmm4, %xmm5 1159; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 1160; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 1161; SSE2-NEXT: pcmpeqd %xmm3, %xmm4 1162; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 1163; SSE2-NEXT: pand %xmm6, %xmm3 1164; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 1165; SSE2-NEXT: por %xmm3, %xmm4 1166; SSE2-NEXT: pand %xmm4, %xmm0 1167; SSE2-NEXT: pxor %xmm2, %xmm4 1168; SSE2-NEXT: movdqa %xmm1, %xmm2 1169; SSE2-NEXT: pandn %xmm4, %xmm2 1170; SSE2-NEXT: por %xmm2, %xmm0 1171; SSE2-NEXT: paddq %xmm1, %xmm0 1172; SSE2-NEXT: retq 1173; 1174; SSE41-LABEL: unsigned_sat_variable_v2i64_using_min: 1175; SSE41: # %bb.0: 1176; SSE41-NEXT: movdqa %xmm0, %xmm2 1177; SSE41-NEXT: pcmpeqd %xmm3, %xmm3 1178; SSE41-NEXT: pxor %xmm1, %xmm3 1179; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456] 1180; SSE41-NEXT: pxor %xmm2, %xmm0 1181; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [9223372034707292159,9223372034707292159] 1182; SSE41-NEXT: pxor %xmm1, %xmm4 1183; SSE41-NEXT: movdqa %xmm4, %xmm5 1184; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 1185; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 1186; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 1187; SSE41-NEXT: pand %xmm5, %xmm0 1188; SSE41-NEXT: por %xmm4, %xmm0 1189; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1190; SSE41-NEXT: paddq %xmm1, %xmm3 1191; SSE41-NEXT: movdqa %xmm3, %xmm0 1192; SSE41-NEXT: retq 1193; 1194; SSE42-LABEL: unsigned_sat_variable_v2i64_using_min: 1195; SSE42: # %bb.0: 1196; SSE42-NEXT: movdqa %xmm0, %xmm2 1197; SSE42-NEXT: pcmpeqd %xmm3, %xmm3 1198; SSE42-NEXT: pxor %xmm1, %xmm3 1199; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] 1200; SSE42-NEXT: pxor %xmm0, %xmm4 1201; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775807,9223372036854775807] 1202; SSE42-NEXT: pxor %xmm1, %xmm0 1203; SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1204; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1205; SSE42-NEXT: paddq %xmm1, %xmm3 1206; SSE42-NEXT: movdqa %xmm3, %xmm0 1207; SSE42-NEXT: retq 1208; 1209; AVX2-LABEL: unsigned_sat_variable_v2i64_using_min: 1210; AVX2: # %bb.0: 1211; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1212; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 1213; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm3 1214; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm1, %xmm4 1215; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 1216; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0 1217; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 1218; AVX2-NEXT: retq 1219; 1220; AVX512-LABEL: unsigned_sat_variable_v2i64_using_min: 1221; AVX512: # %bb.0: 1222; AVX512-NEXT: vmovdqa %xmm1, %xmm2 1223; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2 1224; AVX512-NEXT: vpminuq %xmm2, %xmm0, %xmm0 1225; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 1226; AVX512-NEXT: retq 1227 %noty = xor <2 x i64> %y, <i64 -1, i64 -1> 1228 %c = icmp ult <2 x i64> %x, %noty 1229 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %noty 1230 %r = add <2 x i64> %s, %y 1231 ret <2 x i64> %r 1232} 1233 1234define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i64> %y) { 1235; SSE2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum: 1236; SSE2: # %bb.0: 1237; SSE2-NEXT: paddq %xmm0, %xmm1 1238; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 1239; SSE2-NEXT: pxor %xmm2, %xmm0 1240; SSE2-NEXT: pxor %xmm1, %xmm2 1241; SSE2-NEXT: movdqa %xmm0, %xmm3 1242; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1243; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 1244; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1245; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1246; SSE2-NEXT: pand %xmm4, %xmm2 1247; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1248; SSE2-NEXT: por %xmm1, %xmm0 1249; SSE2-NEXT: por %xmm2, %xmm0 1250; SSE2-NEXT: retq 1251; 1252; SSE41-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum: 1253; SSE41: # %bb.0: 1254; SSE41-NEXT: paddq %xmm0, %xmm1 1255; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 1256; SSE41-NEXT: pxor %xmm2, %xmm0 1257; SSE41-NEXT: pxor %xmm1, %xmm2 1258; SSE41-NEXT: movdqa %xmm0, %xmm3 1259; SSE41-NEXT: pcmpgtd %xmm2, %xmm3 1260; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 1261; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 1262; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1263; SSE41-NEXT: pand %xmm4, %xmm2 1264; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1265; SSE41-NEXT: por %xmm1, %xmm0 1266; SSE41-NEXT: por %xmm2, %xmm0 1267; SSE41-NEXT: retq 1268; 1269; SSE42-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum: 1270; SSE42: # %bb.0: 1271; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 1272; SSE42-NEXT: paddq %xmm0, %xmm1 1273; SSE42-NEXT: pxor %xmm2, %xmm0 1274; SSE42-NEXT: pxor %xmm1, %xmm2 1275; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1276; SSE42-NEXT: por %xmm1, %xmm0 1277; SSE42-NEXT: retq 1278; 1279; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum: 1280; AVX2: # %bb.0: 1281; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 1282; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 1283; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 1284; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm1 1285; AVX2-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 1286; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1287; AVX2-NEXT: retq 1288; 1289; AVX512-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum: 1290; AVX512: # %bb.0: 1291; AVX512-NEXT: vmovdqa %xmm1, %xmm2 1292; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2 1293; AVX512-NEXT: vpminuq %xmm2, %xmm0, %xmm0 1294; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 1295; AVX512-NEXT: retq 1296 %a = add <2 x i64> %x, %y 1297 %c = icmp ugt <2 x i64> %x, %a 1298 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a 1299 ret <2 x i64> %r 1300} 1301 1302define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_notval(<2 x i64> %x, <2 x i64> %y) { 1303; SSE2-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval: 1304; SSE2: # %bb.0: 1305; SSE2-NEXT: movdqa %xmm0, %xmm2 1306; SSE2-NEXT: paddq %xmm1, %xmm2 1307; SSE2-NEXT: pxor {{.*}}(%rip), %xmm1 1308; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 1309; SSE2-NEXT: movdqa %xmm0, %xmm3 1310; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1311; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 1312; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1313; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1314; SSE2-NEXT: pand %xmm4, %xmm1 1315; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1316; SSE2-NEXT: por %xmm2, %xmm0 1317; SSE2-NEXT: por %xmm1, %xmm0 1318; SSE2-NEXT: retq 1319; 1320; SSE41-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval: 1321; SSE41: # %bb.0: 1322; SSE41-NEXT: movdqa %xmm0, %xmm2 1323; SSE41-NEXT: paddq %xmm1, %xmm2 1324; SSE41-NEXT: pxor {{.*}}(%rip), %xmm1 1325; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 1326; SSE41-NEXT: movdqa %xmm0, %xmm3 1327; SSE41-NEXT: pcmpgtd %xmm1, %xmm3 1328; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 1329; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1330; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1331; SSE41-NEXT: pand %xmm4, %xmm1 1332; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1333; SSE41-NEXT: por %xmm2, %xmm0 1334; SSE41-NEXT: por %xmm1, %xmm0 1335; SSE41-NEXT: retq 1336; 1337; SSE42-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval: 1338; SSE42: # %bb.0: 1339; SSE42-NEXT: movdqa %xmm0, %xmm2 1340; SSE42-NEXT: paddq %xmm1, %xmm2 1341; SSE42-NEXT: pxor {{.*}}(%rip), %xmm1 1342; SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 1343; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1344; SSE42-NEXT: por %xmm2, %xmm0 1345; SSE42-NEXT: retq 1346; 1347; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval: 1348; AVX2: # %bb.0: 1349; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm2 1350; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm1, %xmm1 1351; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1352; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 1353; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 1354; AVX2-NEXT: retq 1355; 1356; AVX512-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval: 1357; AVX512: # %bb.0: 1358; AVX512-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1359; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm2 1360; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1 1361; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1 1362; AVX512-NEXT: vmovdqa64 %xmm3, %xmm2 {%k1} 1363; AVX512-NEXT: vmovdqa %xmm2, %xmm0 1364; AVX512-NEXT: retq 1365 %noty = xor <2 x i64> %y, <i64 -1, i64 -1> 1366 %a = add <2 x i64> %x, %y 1367 %c = icmp ugt <2 x i64> %x, %noty 1368 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a 1369 ret <2 x i64> %r 1370} 1371 1372