1; Test the handling of base + 12-bit displacement addresses for large frames, 2; in cases where no 20-bit form exists. The tests here assume z10 register 3; pressure, without the high words being available. 4; 5; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \ 6; RUN: FileCheck -check-prefix=CHECK-NOFP %s 7; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | \ 8; RUN: FileCheck -check-prefix=CHECK-FP %s 9 10; This file tests what happens when a displacement is converted from 11; being relative to the start of a frame object to being relative to 12; the frame itself. In some cases the test is only possible if two 13; objects are allocated. 14; 15; Rather than rely on a particular order for those objects, the tests 16; instead allocate two objects of the same size and apply the test to 17; both of them. For consistency, all tests follow this model, even if 18; one object would actually be enough. 19 20; First check the highest in-range offset after conversion, which is 4092 21; for word-addressing instructions like MVHI. 22; 23; The last in-range doubleword offset is 4088. Since the frame has two 24; emergency spill slots at 160(%r15), the amount that we need to allocate 25; in order to put another object at offset 4088 is (4088 - 176) / 4 = 978 26; words. 27define void @f1() { 28; CHECK-NOFP-LABEL: f1: 29; CHECK-NOFP: mvhi 4092(%r15), 42 30; CHECK-NOFP: br %r14 31; 32; CHECK-FP-LABEL: f1: 33; CHECK-FP: mvhi 4092(%r11), 42 34; CHECK-FP: br %r14 35 %region1 = alloca [978 x i32], align 8 36 %region2 = alloca [978 x i32], align 8 37 %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 1 38 %ptr2 = getelementptr inbounds [978 x i32], [978 x i32]* %region2, i64 0, i64 1 39 store volatile i32 42, i32 *%ptr1 40 store volatile i32 42, i32 *%ptr2 41 ret void 42} 43 44; Test the first out-of-range offset. We cannot use an index register here. 45define void @f2() { 46; CHECK-NOFP-LABEL: f2: 47; CHECK-NOFP: lay %r1, 4096(%r15) 48; CHECK-NOFP: mvhi 0(%r1), 42 49; CHECK-NOFP: br %r14 50; 51; CHECK-FP-LABEL: f2: 52; CHECK-FP: lay %r1, 4096(%r11) 53; CHECK-FP: mvhi 0(%r1), 42 54; CHECK-FP: br %r14 55 %region1 = alloca [978 x i32], align 8 56 %region2 = alloca [978 x i32], align 8 57 %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 2 58 %ptr2 = getelementptr inbounds [978 x i32], [978 x i32]* %region2, i64 0, i64 2 59 store volatile i32 42, i32 *%ptr1 60 store volatile i32 42, i32 *%ptr2 61 ret void 62} 63 64; Test the next offset after that. 65define void @f3() { 66; CHECK-NOFP-LABEL: f3: 67; CHECK-NOFP: lay %r1, 4096(%r15) 68; CHECK-NOFP: mvhi 4(%r1), 42 69; CHECK-NOFP: br %r14 70; 71; CHECK-FP-LABEL: f3: 72; CHECK-FP: lay %r1, 4096(%r11) 73; CHECK-FP: mvhi 4(%r1), 42 74; CHECK-FP: br %r14 75 %region1 = alloca [978 x i32], align 8 76 %region2 = alloca [978 x i32], align 8 77 %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 3 78 %ptr2 = getelementptr inbounds [978 x i32], [978 x i32]* %region2, i64 0, i64 3 79 store volatile i32 42, i32 *%ptr1 80 store volatile i32 42, i32 *%ptr2 81 ret void 82} 83 84; Add 4096 bytes (1024 words) to the size of each object and repeat. 85define void @f4() { 86; CHECK-NOFP-LABEL: f4: 87; CHECK-NOFP: lay %r1, 4096(%r15) 88; CHECK-NOFP: mvhi 4092(%r1), 42 89; CHECK-NOFP: br %r14 90; 91; CHECK-FP-LABEL: f4: 92; CHECK-FP: lay %r1, 4096(%r11) 93; CHECK-FP: mvhi 4092(%r1), 42 94; CHECK-FP: br %r14 95 %region1 = alloca [2002 x i32], align 8 96 %region2 = alloca [2002 x i32], align 8 97 %ptr1 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region1, i64 0, i64 1 98 %ptr2 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region2, i64 0, i64 1 99 store volatile i32 42, i32 *%ptr1 100 store volatile i32 42, i32 *%ptr2 101 ret void 102} 103 104; ...as above. 105define void @f5() { 106; CHECK-NOFP-LABEL: f5: 107; CHECK-NOFP: lay %r1, 8192(%r15) 108; CHECK-NOFP: mvhi 0(%r1), 42 109; CHECK-NOFP: br %r14 110; 111; CHECK-FP-LABEL: f5: 112; CHECK-FP: lay %r1, 8192(%r11) 113; CHECK-FP: mvhi 0(%r1), 42 114; CHECK-FP: br %r14 115 %region1 = alloca [2002 x i32], align 8 116 %region2 = alloca [2002 x i32], align 8 117 %ptr1 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region1, i64 0, i64 2 118 %ptr2 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region2, i64 0, i64 2 119 store volatile i32 42, i32 *%ptr1 120 store volatile i32 42, i32 *%ptr2 121 ret void 122} 123 124; ...as above. 125define void @f6() { 126; CHECK-NOFP-LABEL: f6: 127; CHECK-NOFP: lay %r1, 8192(%r15) 128; CHECK-NOFP: mvhi 4(%r1), 42 129; CHECK-NOFP: br %r14 130; 131; CHECK-FP-LABEL: f6: 132; CHECK-FP: lay %r1, 8192(%r11) 133; CHECK-FP: mvhi 4(%r1), 42 134; CHECK-FP: br %r14 135 %region1 = alloca [2002 x i32], align 8 136 %region2 = alloca [2002 x i32], align 8 137 %ptr1 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region1, i64 0, i64 3 138 %ptr2 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region2, i64 0, i64 3 139 store volatile i32 42, i32 *%ptr1 140 store volatile i32 42, i32 *%ptr2 141 ret void 142} 143 144; Now try an offset of 4092 from the start of the object, with the object 145; being at offset 8192. This time we need objects of (8192 - 176) / 4 = 2004 146; words. 147define void @f7() { 148; CHECK-NOFP-LABEL: f7: 149; CHECK-NOFP: lay %r1, 8192(%r15) 150; CHECK-NOFP: mvhi 4092(%r1), 42 151; CHECK-NOFP: br %r14 152; 153; CHECK-FP-LABEL: f7: 154; CHECK-FP: lay %r1, 8192(%r11) 155; CHECK-FP: mvhi 4092(%r1), 42 156; CHECK-FP: br %r14 157 %region1 = alloca [2004 x i32], align 8 158 %region2 = alloca [2004 x i32], align 8 159 %ptr1 = getelementptr inbounds [2004 x i32], [2004 x i32]* %region1, i64 0, i64 1023 160 %ptr2 = getelementptr inbounds [2004 x i32], [2004 x i32]* %region2, i64 0, i64 1023 161 store volatile i32 42, i32 *%ptr1 162 store volatile i32 42, i32 *%ptr2 163 ret void 164} 165 166; Keep the object-relative offset the same but bump the size of the 167; objects by one doubleword. 168define void @f8() { 169; CHECK-NOFP-LABEL: f8: 170; CHECK-NOFP: lay %r1, 12288(%r15) 171; CHECK-NOFP: mvhi 4(%r1), 42 172; CHECK-NOFP: br %r14 173; 174; CHECK-FP-LABEL: f8: 175; CHECK-FP: lay %r1, 12288(%r11) 176; CHECK-FP: mvhi 4(%r1), 42 177; CHECK-FP: br %r14 178 %region1 = alloca [2006 x i32], align 8 179 %region2 = alloca [2006 x i32], align 8 180 %ptr1 = getelementptr inbounds [2006 x i32], [2006 x i32]* %region1, i64 0, i64 1023 181 %ptr2 = getelementptr inbounds [2006 x i32], [2006 x i32]* %region2, i64 0, i64 1023 182 store volatile i32 42, i32 *%ptr1 183 store volatile i32 42, i32 *%ptr2 184 ret void 185} 186 187; Check a case where the original displacement is out of range. The backend 188; should force STY to be used instead. 189define void @f9() { 190; CHECK-NOFP-LABEL: f9: 191; CHECK-NOFP: lhi [[TMP:%r[0-5]]], 42 192; CHECK-NOFP: sty [[TMP]], 12296(%r15) 193; CHECK-NOFP: br %r14 194; 195; CHECK-FP-LABEL: f9: 196; CHECK-FP: lhi [[TMP:%r[0-5]]], 42 197; CHECK-FP: sty [[TMP]], 12296(%r11) 198; CHECK-FP: br %r14 199 %region1 = alloca [2006 x i32], align 8 200 %region2 = alloca [2006 x i32], align 8 201 %ptr1 = getelementptr inbounds [2006 x i32], [2006 x i32]* %region1, i64 0, i64 1024 202 %ptr2 = getelementptr inbounds [2006 x i32], [2006 x i32]* %region2, i64 0, i64 1024 203 store volatile i32 42, i32 *%ptr1 204 store volatile i32 42, i32 *%ptr2 205 ret void 206} 207 208; Repeat f2 in a case that needs the emergency spill slots (because all 209; call-clobbered registers are live and no call-saved ones have been 210; allocated). 211define void @f10(i32 *%vptr) { 212; CHECK-NOFP-LABEL: f10: 213; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15) 214; CHECK-NOFP: lay [[REGISTER]], 4096(%r15) 215; CHECK-NOFP: mvhi 0([[REGISTER]]), 42 216; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15) 217; CHECK-NOFP: br %r14 218; 219; CHECK-FP-LABEL: f10: 220; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11) 221; CHECK-FP: lay [[REGISTER]], 4096(%r11) 222; CHECK-FP: mvhi 0([[REGISTER]]), 42 223; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11) 224; CHECK-FP: br %r14 225 %i0 = load volatile i32, i32 *%vptr 226 %i1 = load volatile i32, i32 *%vptr 227 %i3 = load volatile i32, i32 *%vptr 228 %i4 = load volatile i32, i32 *%vptr 229 %i5 = load volatile i32, i32 *%vptr 230 %region1 = alloca [978 x i32], align 8 231 %region2 = alloca [978 x i32], align 8 232 %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 2 233 %ptr2 = getelementptr inbounds [978 x i32], [978 x i32]* %region2, i64 0, i64 2 234 store volatile i32 42, i32 *%ptr1 235 store volatile i32 42, i32 *%ptr2 236 store volatile i32 %i0, i32 *%vptr 237 store volatile i32 %i1, i32 *%vptr 238 store volatile i32 %i3, i32 *%vptr 239 store volatile i32 %i4, i32 *%vptr 240 store volatile i32 %i5, i32 *%vptr 241 ret void 242} 243 244; And again with maximum register pressure. The only spill slots that the 245; NOFP case needs are the emergency ones, so the offsets are the same as for f2. 246; The FP case needs to spill an extra register and is too dependent on 247; register allocation heuristics for a stable test. 248define void @f11(i32 *%vptr) { 249; CHECK-NOFP-LABEL: f11: 250; CHECK-NOFP: stmg %r6, %r15, 251; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15) 252; CHECK-NOFP: lay [[REGISTER]], 4096(%r15) 253; CHECK-NOFP: mvhi 0([[REGISTER]]), 42 254; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15) 255; CHECK-NOFP: lmg %r6, %r15, 256; CHECK-NOFP: br %r14 257 %i0 = load volatile i32, i32 *%vptr 258 %i1 = load volatile i32, i32 *%vptr 259 %i3 = load volatile i32, i32 *%vptr 260 %i4 = load volatile i32, i32 *%vptr 261 %i5 = load volatile i32, i32 *%vptr 262 %i6 = load volatile i32, i32 *%vptr 263 %i7 = load volatile i32, i32 *%vptr 264 %i8 = load volatile i32, i32 *%vptr 265 %i9 = load volatile i32, i32 *%vptr 266 %i10 = load volatile i32, i32 *%vptr 267 %i11 = load volatile i32, i32 *%vptr 268 %i12 = load volatile i32, i32 *%vptr 269 %i13 = load volatile i32, i32 *%vptr 270 %i14 = load volatile i32, i32 *%vptr 271 %region1 = alloca [978 x i32], align 8 272 %region2 = alloca [978 x i32], align 8 273 %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 2 274 %ptr2 = getelementptr inbounds [978 x i32], [978 x i32]* %region2, i64 0, i64 2 275 store volatile i32 42, i32 *%ptr1 276 store volatile i32 42, i32 *%ptr2 277 store volatile i32 %i0, i32 *%vptr 278 store volatile i32 %i1, i32 *%vptr 279 store volatile i32 %i3, i32 *%vptr 280 store volatile i32 %i4, i32 *%vptr 281 store volatile i32 %i5, i32 *%vptr 282 store volatile i32 %i6, i32 *%vptr 283 store volatile i32 %i7, i32 *%vptr 284 store volatile i32 %i8, i32 *%vptr 285 store volatile i32 %i9, i32 *%vptr 286 store volatile i32 %i10, i32 *%vptr 287 store volatile i32 %i11, i32 *%vptr 288 store volatile i32 %i12, i32 *%vptr 289 store volatile i32 %i13, i32 *%vptr 290 store volatile i32 %i14, i32 *%vptr 291 ret void 292} 293