1; Test 32-bit conditional stores that are presented as selects. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s 4 5declare void @foo(i32 *) 6 7; Test the simple case, with the loaded value first. 8define void @f1(i32 *%ptr, i32 %alt, i32 %limit) { 9; CHECK-LABEL: f1: 10; CHECK-NOT: %r2 11; CHECK: blr %r14 12; CHECK-NOT: %r2 13; CHECK: st %r3, 0(%r2) 14; CHECK: br %r14 15 %cond = icmp ult i32 %limit, 420 16 %orig = load i32, i32 *%ptr 17 %res = select i1 %cond, i32 %orig, i32 %alt 18 store i32 %res, i32 *%ptr 19 ret void 20} 21 22; ...and with the loaded value second 23define void @f2(i32 *%ptr, i32 %alt, i32 %limit) { 24; CHECK-LABEL: f2: 25; CHECK-NOT: %r2 26; CHECK: bher %r14 27; CHECK-NOT: %r2 28; CHECK: st %r3, 0(%r2) 29; CHECK: br %r14 30 %cond = icmp ult i32 %limit, 420 31 %orig = load i32, i32 *%ptr 32 %res = select i1 %cond, i32 %alt, i32 %orig 33 store i32 %res, i32 *%ptr 34 ret void 35} 36 37; Test cases where the value is explicitly sign-extended to 64 bits, with the 38; loaded value first. 39define void @f3(i32 *%ptr, i64 %alt, i32 %limit) { 40; CHECK-LABEL: f3: 41; CHECK-NOT: %r2 42; CHECK: blr %r14 43; CHECK-NOT: %r2 44; CHECK: st %r3, 0(%r2) 45; CHECK: br %r14 46 %cond = icmp ult i32 %limit, 420 47 %orig = load i32, i32 *%ptr 48 %ext = sext i32 %orig to i64 49 %res = select i1 %cond, i64 %ext, i64 %alt 50 %trunc = trunc i64 %res to i32 51 store i32 %trunc, i32 *%ptr 52 ret void 53} 54 55; ...and with the loaded value second 56define void @f4(i32 *%ptr, i64 %alt, i32 %limit) { 57; CHECK-LABEL: f4: 58; CHECK-NOT: %r2 59; CHECK: bher %r14 60; CHECK-NOT: %r2 61; CHECK: st %r3, 0(%r2) 62; CHECK: br %r14 63 %cond = icmp ult i32 %limit, 420 64 %orig = load i32, i32 *%ptr 65 %ext = sext i32 %orig to i64 66 %res = select i1 %cond, i64 %alt, i64 %ext 67 %trunc = trunc i64 %res to i32 68 store i32 %trunc, i32 *%ptr 69 ret void 70} 71 72; Test cases where the value is explicitly zero-extended to 32 bits, with the 73; loaded value first. 74define void @f5(i32 *%ptr, i64 %alt, i32 %limit) { 75; CHECK-LABEL: f5: 76; CHECK-NOT: %r2 77; CHECK: blr %r14 78; CHECK-NOT: %r2 79; CHECK: st %r3, 0(%r2) 80; CHECK: br %r14 81 %cond = icmp ult i32 %limit, 420 82 %orig = load i32, i32 *%ptr 83 %ext = zext i32 %orig to i64 84 %res = select i1 %cond, i64 %ext, i64 %alt 85 %trunc = trunc i64 %res to i32 86 store i32 %trunc, i32 *%ptr 87 ret void 88} 89 90; ...and with the loaded value second 91define void @f6(i32 *%ptr, i64 %alt, i32 %limit) { 92; CHECK-LABEL: f6: 93; CHECK-NOT: %r2 94; CHECK: bher %r14 95; CHECK-NOT: %r2 96; CHECK: st %r3, 0(%r2) 97; CHECK: br %r14 98 %cond = icmp ult i32 %limit, 420 99 %orig = load i32, i32 *%ptr 100 %ext = zext i32 %orig to i64 101 %res = select i1 %cond, i64 %alt, i64 %ext 102 %trunc = trunc i64 %res to i32 103 store i32 %trunc, i32 *%ptr 104 ret void 105} 106 107; Check the high end of the aligned ST range. 108define void @f7(i32 *%base, i32 %alt, i32 %limit) { 109; CHECK-LABEL: f7: 110; CHECK-NOT: %r2 111; CHECK: blr %r14 112; CHECK-NOT: %r2 113; CHECK: st %r3, 4092(%r2) 114; CHECK: br %r14 115 %ptr = getelementptr i32, i32 *%base, i64 1023 116 %cond = icmp ult i32 %limit, 420 117 %orig = load i32, i32 *%ptr 118 %res = select i1 %cond, i32 %orig, i32 %alt 119 store i32 %res, i32 *%ptr 120 ret void 121} 122 123; Check the next word up, which should use STY instead of ST. 124define void @f8(i32 *%base, i32 %alt, i32 %limit) { 125; CHECK-LABEL: f8: 126; CHECK-NOT: %r2 127; CHECK: blr %r14 128; CHECK-NOT: %r2 129; CHECK: sty %r3, 4096(%r2) 130; CHECK: br %r14 131 %ptr = getelementptr i32, i32 *%base, i64 1024 132 %cond = icmp ult i32 %limit, 420 133 %orig = load i32, i32 *%ptr 134 %res = select i1 %cond, i32 %orig, i32 %alt 135 store i32 %res, i32 *%ptr 136 ret void 137} 138 139; Check the high end of the aligned STY range. 140define void @f9(i32 *%base, i32 %alt, i32 %limit) { 141; CHECK-LABEL: f9: 142; CHECK-NOT: %r2 143; CHECK: blr %r14 144; CHECK-NOT: %r2 145; CHECK: sty %r3, 524284(%r2) 146; CHECK: br %r14 147 %ptr = getelementptr i32, i32 *%base, i64 131071 148 %cond = icmp ult i32 %limit, 420 149 %orig = load i32, i32 *%ptr 150 %res = select i1 %cond, i32 %orig, i32 %alt 151 store i32 %res, i32 *%ptr 152 ret void 153} 154 155; Check the next word up, which needs separate address logic. 156; Other sequences besides this one would be OK. 157define void @f10(i32 *%base, i32 %alt, i32 %limit) { 158; CHECK-LABEL: f10: 159; CHECK-NOT: %r2 160; CHECK: blr %r14 161; CHECK-NOT: %r2 162; CHECK: agfi %r2, 524288 163; CHECK: st %r3, 0(%r2) 164; CHECK: br %r14 165 %ptr = getelementptr i32, i32 *%base, i64 131072 166 %cond = icmp ult i32 %limit, 420 167 %orig = load i32, i32 *%ptr 168 %res = select i1 %cond, i32 %orig, i32 %alt 169 store i32 %res, i32 *%ptr 170 ret void 171} 172 173; Check the low end of the STY range. 174define void @f11(i32 *%base, i32 %alt, i32 %limit) { 175; CHECK-LABEL: f11: 176; CHECK-NOT: %r2 177; CHECK: blr %r14 178; CHECK-NOT: %r2 179; CHECK: sty %r3, -524288(%r2) 180; CHECK: br %r14 181 %ptr = getelementptr i32, i32 *%base, i64 -131072 182 %cond = icmp ult i32 %limit, 420 183 %orig = load i32, i32 *%ptr 184 %res = select i1 %cond, i32 %orig, i32 %alt 185 store i32 %res, i32 *%ptr 186 ret void 187} 188 189; Check the next word down, which needs separate address logic. 190; Other sequences besides this one would be OK. 191define void @f12(i32 *%base, i32 %alt, i32 %limit) { 192; CHECK-LABEL: f12: 193; CHECK-NOT: %r2 194; CHECK: blr %r14 195; CHECK-NOT: %r2 196; CHECK: agfi %r2, -524292 197; CHECK: st %r3, 0(%r2) 198; CHECK: br %r14 199 %ptr = getelementptr i32, i32 *%base, i64 -131073 200 %cond = icmp ult i32 %limit, 420 201 %orig = load i32, i32 *%ptr 202 %res = select i1 %cond, i32 %orig, i32 %alt 203 store i32 %res, i32 *%ptr 204 ret void 205} 206 207; Check that STY allows an index. 208define void @f13(i64 %base, i64 %index, i32 %alt, i32 %limit) { 209; CHECK-LABEL: f13: 210; CHECK-NOT: %r2 211; CHECK: blr %r14 212; CHECK-NOT: %r2 213; CHECK: sty %r4, 4096(%r3,%r2) 214; CHECK: br %r14 215 %add1 = add i64 %base, %index 216 %add2 = add i64 %add1, 4096 217 %ptr = inttoptr i64 %add2 to i32 * 218 %cond = icmp ult i32 %limit, 420 219 %orig = load i32, i32 *%ptr 220 %res = select i1 %cond, i32 %orig, i32 %alt 221 store i32 %res, i32 *%ptr 222 ret void 223} 224 225; Check that volatile loads are not matched. 226define void @f14(i32 *%ptr, i32 %alt, i32 %limit) { 227; CHECK-LABEL: f14: 228; CHECK: l {{%r[0-5]}}, 0(%r2) 229; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] 230; CHECK: [[LABEL]]: 231; CHECK: st {{%r[0-5]}}, 0(%r2) 232; CHECK: br %r14 233 %cond = icmp ult i32 %limit, 420 234 %orig = load volatile i32, i32 *%ptr 235 %res = select i1 %cond, i32 %orig, i32 %alt 236 store i32 %res, i32 *%ptr 237 ret void 238} 239 240; ...likewise stores. In this case we should have a conditional load into %r3. 241define void @f15(i32 *%ptr, i32 %alt, i32 %limit) { 242; CHECK-LABEL: f15: 243; CHECK: jhe [[LABEL:[^ ]*]] 244; CHECK: l %r3, 0(%r2) 245; CHECK: [[LABEL]]: 246; CHECK: st %r3, 0(%r2) 247; CHECK: br %r14 248 %cond = icmp ult i32 %limit, 420 249 %orig = load i32, i32 *%ptr 250 %res = select i1 %cond, i32 %orig, i32 %alt 251 store volatile i32 %res, i32 *%ptr 252 ret void 253} 254 255; Check that atomic loads are not matched. The transformation is OK for 256; the "unordered" case tested here, but since we don't try to handle atomic 257; operations at all in this context, it seems better to assert that than 258; to restrict the test to a stronger ordering. 259define void @f16(i32 *%ptr, i32 %alt, i32 %limit) { 260; FIXME: should use a normal load instead of CS. 261; CHECK-LABEL: f16: 262; CHECK: l {{%r[0-5]}}, 0(%r2) 263; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] 264; CHECK: [[LABEL]]: 265; CHECK: st {{%r[0-5]}}, 0(%r2) 266; CHECK: br %r14 267 %cond = icmp ult i32 %limit, 420 268 %orig = load atomic i32, i32 *%ptr unordered, align 4 269 %res = select i1 %cond, i32 %orig, i32 %alt 270 store i32 %res, i32 *%ptr 271 ret void 272} 273 274; ...likewise stores. 275define void @f17(i32 *%ptr, i32 %alt, i32 %limit) { 276; FIXME: should use a normal store instead of CS. 277; CHECK-LABEL: f17: 278; CHECK: jhe [[LABEL:[^ ]*]] 279; CHECK: l %r3, 0(%r2) 280; CHECK: [[LABEL]]: 281; CHECK: st %r3, 0(%r2) 282; CHECK: br %r14 283 %cond = icmp ult i32 %limit, 420 284 %orig = load i32, i32 *%ptr 285 %res = select i1 %cond, i32 %orig, i32 %alt 286 store atomic i32 %res, i32 *%ptr unordered, align 4 287 ret void 288} 289 290; Try a frame index base. 291define void @f18(i32 %alt, i32 %limit) { 292; CHECK-LABEL: f18: 293; CHECK: brasl %r14, foo@PLT 294; CHECK-NOT: %r15 295; CHECK: jl [[LABEL:[^ ]*]] 296; CHECK-NOT: %r15 297; CHECK: st {{%r[0-9]+}}, {{[0-9]+}}(%r15) 298; CHECK: [[LABEL]]: 299; CHECK: brasl %r14, foo@PLT 300; CHECK: br %r14 301 %ptr = alloca i32 302 call void @foo(i32 *%ptr) 303 %cond = icmp ult i32 %limit, 420 304 %orig = load i32, i32 *%ptr 305 %res = select i1 %cond, i32 %orig, i32 %alt 306 store i32 %res, i32 *%ptr 307 call void @foo(i32 *%ptr) 308 ret void 309} 310