1; Test 64-bit subtraction in which the second operand is variable. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s 4 5declare i64 @foo() 6 7; Check SLGR. 8define zeroext i1 @f1(i64 %dummy, i64 %a, i64 %b, i64 *%res) { 9; CHECK-LABEL: f1: 10; CHECK: slgr %r3, %r4 11; CHECK-DAG: stg %r3, 0(%r5) 12; CHECK-DAG: ipm [[REG:%r[0-5]]] 13; CHECK-DAG: afi [[REG]], -536870912 14; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 15; CHECK: br %r14 16 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) 17 %val = extractvalue {i64, i1} %t, 0 18 %obit = extractvalue {i64, i1} %t, 1 19 store i64 %val, i64 *%res 20 ret i1 %obit 21} 22 23; Check using the overflow result for a branch. 24define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%res) { 25; CHECK-LABEL: f2: 26; CHECK: slgr %r3, %r4 27; CHECK: stg %r3, 0(%r5) 28; CHECK: jgle foo@PLT 29; CHECK: br %r14 30 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) 31 %val = extractvalue {i64, i1} %t, 0 32 %obit = extractvalue {i64, i1} %t, 1 33 store i64 %val, i64 *%res 34 br i1 %obit, label %call, label %exit 35 36call: 37 tail call i64 @foo() 38 br label %exit 39 40exit: 41 ret void 42} 43 44; ... and the same with the inverted direction. 45define void @f3(i64 %dummy, i64 %a, i64 %b, i64 *%res) { 46; CHECK-LABEL: f3: 47; CHECK: slgr %r3, %r4 48; CHECK: stg %r3, 0(%r5) 49; CHECK: jgnle foo@PLT 50; CHECK: br %r14 51 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) 52 %val = extractvalue {i64, i1} %t, 0 53 %obit = extractvalue {i64, i1} %t, 1 54 store i64 %val, i64 *%res 55 br i1 %obit, label %exit, label %call 56 57call: 58 tail call i64 @foo() 59 br label %exit 60 61exit: 62 ret void 63} 64 65; Check SLG with no displacement. 66define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { 67; CHECK-LABEL: f4: 68; CHECK: slg %r3, 0(%r4) 69; CHECK-DAG: stg %r3, 0(%r5) 70; CHECK-DAG: ipm [[REG:%r[0-5]]] 71; CHECK-DAG: afi [[REG]], -536870912 72; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 73; CHECK: br %r14 74 %b = load i64, i64 *%src 75 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) 76 %val = extractvalue {i64, i1} %t, 0 77 %obit = extractvalue {i64, i1} %t, 1 78 store i64 %val, i64 *%res 79 ret i1 %obit 80} 81 82; Check the high end of the aligned SLG range. 83define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { 84; CHECK-LABEL: f5: 85; CHECK: slg %r3, 524280(%r4) 86; CHECK-DAG: stg %r3, 0(%r5) 87; CHECK-DAG: ipm [[REG:%r[0-5]]] 88; CHECK-DAG: afi [[REG]], -536870912 89; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 90; CHECK: br %r14 91 %ptr = getelementptr i64, i64 *%src, i64 65535 92 %b = load i64, i64 *%ptr 93 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) 94 %val = extractvalue {i64, i1} %t, 0 95 %obit = extractvalue {i64, i1} %t, 1 96 store i64 %val, i64 *%res 97 ret i1 %obit 98} 99 100; Check the next doubleword up, which needs separate address logic. 101; Other sequences besides this one would be OK. 102define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { 103; CHECK-LABEL: f6: 104; CHECK: agfi %r4, 524288 105; CHECK: slg %r3, 0(%r4) 106; CHECK-DAG: stg %r3, 0(%r5) 107; CHECK-DAG: ipm [[REG:%r[0-5]]] 108; CHECK-DAG: afi [[REG]], -536870912 109; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 110; CHECK: br %r14 111 %ptr = getelementptr i64, i64 *%src, i64 65536 112 %b = load i64, i64 *%ptr 113 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) 114 %val = extractvalue {i64, i1} %t, 0 115 %obit = extractvalue {i64, i1} %t, 1 116 store i64 %val, i64 *%res 117 ret i1 %obit 118} 119 120; Check the high end of the negative aligned SLG range. 121define zeroext i1 @f7(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { 122; CHECK-LABEL: f7: 123; CHECK: slg %r3, -8(%r4) 124; CHECK-DAG: stg %r3, 0(%r5) 125; CHECK-DAG: ipm [[REG:%r[0-5]]] 126; CHECK-DAG: afi [[REG]], -536870912 127; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 128; CHECK: br %r14 129 %ptr = getelementptr i64, i64 *%src, i64 -1 130 %b = load i64, i64 *%ptr 131 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) 132 %val = extractvalue {i64, i1} %t, 0 133 %obit = extractvalue {i64, i1} %t, 1 134 store i64 %val, i64 *%res 135 ret i1 %obit 136} 137 138; Check the low end of the SLG range. 139define zeroext i1 @f8(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { 140; CHECK-LABEL: f8: 141; CHECK: slg %r3, -524288(%r4) 142; CHECK-DAG: stg %r3, 0(%r5) 143; CHECK-DAG: ipm [[REG:%r[0-5]]] 144; CHECK-DAG: afi [[REG]], -536870912 145; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 146; CHECK: br %r14 147 %ptr = getelementptr i64, i64 *%src, i64 -65536 148 %b = load i64, i64 *%ptr 149 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) 150 %val = extractvalue {i64, i1} %t, 0 151 %obit = extractvalue {i64, i1} %t, 1 152 store i64 %val, i64 *%res 153 ret i1 %obit 154} 155 156; Check the next doubleword down, which needs separate address logic. 157; Other sequences besides this one would be OK. 158define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { 159; CHECK-LABEL: f9: 160; CHECK: agfi %r4, -524296 161; CHECK: slg %r3, 0(%r4) 162; CHECK-DAG: stg %r3, 0(%r5) 163; CHECK-DAG: ipm [[REG:%r[0-5]]] 164; CHECK-DAG: afi [[REG]], -536870912 165; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 166; CHECK: br %r14 167 %ptr = getelementptr i64, i64 *%src, i64 -65537 168 %b = load i64, i64 *%ptr 169 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) 170 %val = extractvalue {i64, i1} %t, 0 171 %obit = extractvalue {i64, i1} %t, 1 172 store i64 %val, i64 *%res 173 ret i1 %obit 174} 175 176; Check that SLG allows an index. 177define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { 178; CHECK-LABEL: f10: 179; CHECK: slg %r4, 524280({{%r3,%r2|%r2,%r3}}) 180; CHECK-DAG: stg %r4, 0(%r5) 181; CHECK-DAG: ipm [[REG:%r[0-5]]] 182; CHECK-DAG: afi [[REG]], -536870912 183; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 184; CHECK: br %r14 185 %add1 = add i64 %src, %index 186 %add2 = add i64 %add1, 524280 187 %ptr = inttoptr i64 %add2 to i64 * 188 %b = load i64, i64 *%ptr 189 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) 190 %val = extractvalue {i64, i1} %t, 0 191 %obit = extractvalue {i64, i1} %t, 1 192 store i64 %val, i64 *%res 193 ret i1 %obit 194} 195 196; Check that subtractions of spilled values can use SLG rather than SLGR. 197define zeroext i1 @f11(i64 *%ptr0) { 198; CHECK-LABEL: f11: 199; CHECK: brasl %r14, foo@PLT 200; CHECK: slg %r2, 160(%r15) 201; CHECK: br %r14 202 %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 203 %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 204 %ptr3 = getelementptr i64, i64 *%ptr0, i64 6 205 %ptr4 = getelementptr i64, i64 *%ptr0, i64 8 206 %ptr5 = getelementptr i64, i64 *%ptr0, i64 10 207 %ptr6 = getelementptr i64, i64 *%ptr0, i64 12 208 %ptr7 = getelementptr i64, i64 *%ptr0, i64 14 209 %ptr8 = getelementptr i64, i64 *%ptr0, i64 16 210 %ptr9 = getelementptr i64, i64 *%ptr0, i64 18 211 212 %val0 = load i64, i64 *%ptr0 213 %val1 = load i64, i64 *%ptr1 214 %val2 = load i64, i64 *%ptr2 215 %val3 = load i64, i64 *%ptr3 216 %val4 = load i64, i64 *%ptr4 217 %val5 = load i64, i64 *%ptr5 218 %val6 = load i64, i64 *%ptr6 219 %val7 = load i64, i64 *%ptr7 220 %val8 = load i64, i64 *%ptr8 221 %val9 = load i64, i64 *%ptr9 222 223 %ret = call i64 @foo() 224 225 %t0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %ret, i64 %val0) 226 %add0 = extractvalue {i64, i1} %t0, 0 227 %obit0 = extractvalue {i64, i1} %t0, 1 228 %t1 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add0, i64 %val1) 229 %add1 = extractvalue {i64, i1} %t1, 0 230 %obit1 = extractvalue {i64, i1} %t1, 1 231 %res1 = or i1 %obit0, %obit1 232 %t2 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add1, i64 %val2) 233 %add2 = extractvalue {i64, i1} %t2, 0 234 %obit2 = extractvalue {i64, i1} %t2, 1 235 %res2 = or i1 %res1, %obit2 236 %t3 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add2, i64 %val3) 237 %add3 = extractvalue {i64, i1} %t3, 0 238 %obit3 = extractvalue {i64, i1} %t3, 1 239 %res3 = or i1 %res2, %obit3 240 %t4 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add3, i64 %val4) 241 %add4 = extractvalue {i64, i1} %t4, 0 242 %obit4 = extractvalue {i64, i1} %t4, 1 243 %res4 = or i1 %res3, %obit4 244 %t5 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add4, i64 %val5) 245 %add5 = extractvalue {i64, i1} %t5, 0 246 %obit5 = extractvalue {i64, i1} %t5, 1 247 %res5 = or i1 %res4, %obit5 248 %t6 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add5, i64 %val6) 249 %add6 = extractvalue {i64, i1} %t6, 0 250 %obit6 = extractvalue {i64, i1} %t6, 1 251 %res6 = or i1 %res5, %obit6 252 %t7 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add6, i64 %val7) 253 %add7 = extractvalue {i64, i1} %t7, 0 254 %obit7 = extractvalue {i64, i1} %t7, 1 255 %res7 = or i1 %res6, %obit7 256 %t8 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add7, i64 %val8) 257 %add8 = extractvalue {i64, i1} %t8, 0 258 %obit8 = extractvalue {i64, i1} %t8, 1 259 %res8 = or i1 %res7, %obit8 260 %t9 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add8, i64 %val9) 261 %add9 = extractvalue {i64, i1} %t9, 0 262 %obit9 = extractvalue {i64, i1} %t9, 1 263 %res9 = or i1 %res8, %obit9 264 265 ret i1 %res9 266} 267 268declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone 269 270