1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX 4 5declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone 6declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone 7 8declare {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 9declare {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 10 11; fold (ssub x, 0) -> x 12define i32 @combine_ssub_zero(i32 %a0, i32 %a1) { 13; SSE-LABEL: combine_ssub_zero: 14; SSE: # %bb.0: 15; SSE-NEXT: movl %edi, %eax 16; SSE-NEXT: retq 17; 18; AVX-LABEL: combine_ssub_zero: 19; AVX: # %bb.0: 20; AVX-NEXT: movl %edi, %eax 21; AVX-NEXT: retq 22 %1 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a0, i32 zeroinitializer) 23 %2 = extractvalue {i32, i1} %1, 0 24 %3 = extractvalue {i32, i1} %1, 1 25 %4 = select i1 %3, i32 %a1, i32 %2 26 ret i32 %4 27} 28 29define <4 x i32> @combine_vec_ssub_zero(<4 x i32> %a0, <4 x i32> %a1) { 30; SSE-LABEL: combine_vec_ssub_zero: 31; SSE: # %bb.0: 32; SSE-NEXT: retq 33; 34; AVX-LABEL: combine_vec_ssub_zero: 35; AVX: # %bb.0: 36; AVX-NEXT: retq 37 %1 = call {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer) 38 %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 39 %3 = extractvalue {<4 x i32>, <4 x i1>} %1, 1 40 %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2 41 ret <4 x i32> %4 42} 43 44; fold (usub x, 0) -> x 45define i32 @combine_usub_zero(i32 %a0, i32 %a1) { 46; SSE-LABEL: combine_usub_zero: 47; SSE: # %bb.0: 48; SSE-NEXT: movl %edi, %eax 49; SSE-NEXT: retq 50; 51; AVX-LABEL: combine_usub_zero: 52; AVX: # %bb.0: 53; AVX-NEXT: movl %edi, %eax 54; AVX-NEXT: retq 55 %1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a0, i32 zeroinitializer) 56 %2 = extractvalue {i32, i1} %1, 0 57 %3 = extractvalue {i32, i1} %1, 1 58 %4 = select i1 %3, i32 %a1, i32 %2 59 ret i32 %4 60} 61 62define <4 x i32> @combine_vec_usub_zero(<4 x i32> %a0, <4 x i32> %a1) { 63; SSE-LABEL: combine_vec_usub_zero: 64; SSE: # %bb.0: 65; SSE-NEXT: retq 66; 67; AVX-LABEL: combine_vec_usub_zero: 68; AVX: # %bb.0: 69; AVX-NEXT: retq 70 %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer) 71 %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 72 %3 = extractvalue {<4 x i32>, <4 x i1>} %1, 1 73 %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2 74 ret <4 x i32> %4 75} 76 77; fold (ssub x, x) -> 0 78define i32 @combine_ssub_self(i32 %a0, i32 %a1) { 79; SSE-LABEL: combine_ssub_self: 80; SSE: # %bb.0: 81; SSE-NEXT: xorl %eax, %eax 82; SSE-NEXT: retq 83; 84; AVX-LABEL: combine_ssub_self: 85; AVX: # %bb.0: 86; AVX-NEXT: xorl %eax, %eax 87; AVX-NEXT: retq 88 %1 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a0, i32 %a0) 89 %2 = extractvalue {i32, i1} %1, 0 90 %3 = extractvalue {i32, i1} %1, 1 91 %4 = select i1 %3, i32 %a1, i32 %2 92 ret i32 %4 93} 94 95define <4 x i32> @combine_vec_ssub_self(<4 x i32> %a0, <4 x i32> %a1) { 96; SSE-LABEL: combine_vec_ssub_self: 97; SSE: # %bb.0: 98; SSE-NEXT: xorps %xmm0, %xmm0 99; SSE-NEXT: retq 100; 101; AVX-LABEL: combine_vec_ssub_self: 102; AVX: # %bb.0: 103; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 104; AVX-NEXT: retq 105 %1 = call {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> %a0) 106 %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 107 %3 = extractvalue {<4 x i32>, <4 x i1>} %1, 1 108 %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2 109 ret <4 x i32> %4 110} 111 112; fold (usub x, x) -> x 113define i32 @combine_usub_self(i32 %a0, i32 %a1) { 114; SSE-LABEL: combine_usub_self: 115; SSE: # %bb.0: 116; SSE-NEXT: xorl %eax, %eax 117; SSE-NEXT: retq 118; 119; AVX-LABEL: combine_usub_self: 120; AVX: # %bb.0: 121; AVX-NEXT: xorl %eax, %eax 122; AVX-NEXT: retq 123 %1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a0, i32 %a0) 124 %2 = extractvalue {i32, i1} %1, 0 125 %3 = extractvalue {i32, i1} %1, 1 126 %4 = select i1 %3, i32 %a1, i32 %2 127 ret i32 %4 128} 129 130define <4 x i32> @combine_vec_usub_self(<4 x i32> %a0, <4 x i32> %a1) { 131; SSE-LABEL: combine_vec_usub_self: 132; SSE: # %bb.0: 133; SSE-NEXT: xorps %xmm0, %xmm0 134; SSE-NEXT: retq 135; 136; AVX-LABEL: combine_vec_usub_self: 137; AVX: # %bb.0: 138; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 139; AVX-NEXT: retq 140 %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> %a0) 141 %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 142 %3 = extractvalue {<4 x i32>, <4 x i1>} %1, 1 143 %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2 144 ret <4 x i32> %4 145} 146 147; fold (usub -1, x) -> (xor x, -1) + no borrow 148define i32 @combine_usub_negone(i32 %a0, i32 %a1) { 149; SSE-LABEL: combine_usub_negone: 150; SSE: # %bb.0: 151; SSE-NEXT: movl %edi, %eax 152; SSE-NEXT: notl %eax 153; SSE-NEXT: retq 154; 155; AVX-LABEL: combine_usub_negone: 156; AVX: # %bb.0: 157; AVX-NEXT: movl %edi, %eax 158; AVX-NEXT: notl %eax 159; AVX-NEXT: retq 160 %1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 -1, i32 %a0) 161 %2 = extractvalue {i32, i1} %1, 0 162 %3 = extractvalue {i32, i1} %1, 1 163 %4 = select i1 %3, i32 %a1, i32 %2 164 ret i32 %4 165} 166 167define <4 x i32> @combine_vec_usub_negone(<4 x i32> %a0, <4 x i32> %a1) { 168; SSE-LABEL: combine_vec_usub_negone: 169; SSE: # %bb.0: 170; SSE-NEXT: pcmpeqd %xmm1, %xmm1 171; SSE-NEXT: pxor %xmm1, %xmm0 172; SSE-NEXT: retq 173; 174; AVX-LABEL: combine_vec_usub_negone: 175; AVX: # %bb.0: 176; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 177; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 178; AVX-NEXT: retq 179 %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a0) 180 %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 181 %3 = extractvalue {<4 x i32>, <4 x i1>} %1, 1 182 %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2 183 ret <4 x i32> %4 184} 185