1; Test 16-bit conditional stores that are presented as selects. The volatile 2; tests require z10, which use a branch instead of a LOCR. 3; 4; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s 5 6declare void @foo(i16 *) 7 8; Test the simple case, with the loaded value first. 9define void @f1(i16 *%ptr, i16 %alt, i32 %limit) { 10; CHECK-LABEL: f1: 11; CHECK-NOT: %r2 12; CHECK: blr %r14 13; CHECK-NOT: %r2 14; CHECK: sth %r3, 0(%r2) 15; CHECK: br %r14 16 %cond = icmp ult i32 %limit, 420 17 %orig = load i16 , i16 *%ptr 18 %res = select i1 %cond, i16 %orig, i16 %alt 19 store i16 %res, i16 *%ptr 20 ret void 21} 22 23; ...and with the loaded value second 24define void @f2(i16 *%ptr, i16 %alt, i32 %limit) { 25; CHECK-LABEL: f2: 26; CHECK-NOT: %r2 27; CHECK: bher %r14 28; CHECK-NOT: %r2 29; CHECK: sth %r3, 0(%r2) 30; CHECK: br %r14 31 %cond = icmp ult i32 %limit, 420 32 %orig = load i16 , i16 *%ptr 33 %res = select i1 %cond, i16 %alt, i16 %orig 34 store i16 %res, i16 *%ptr 35 ret void 36} 37 38; Test cases where the value is explicitly sign-extended to 32 bits, with the 39; loaded value first. 40define void @f3(i16 *%ptr, i32 %alt, i32 %limit) { 41; CHECK-LABEL: f3: 42; CHECK-NOT: %r2 43; CHECK: blr %r14 44; CHECK-NOT: %r2 45; CHECK: sth %r3, 0(%r2) 46; CHECK: br %r14 47 %cond = icmp ult i32 %limit, 420 48 %orig = load i16 , i16 *%ptr 49 %ext = sext i16 %orig to i32 50 %res = select i1 %cond, i32 %ext, i32 %alt 51 %trunc = trunc i32 %res to i16 52 store i16 %trunc, i16 *%ptr 53 ret void 54} 55 56; ...and with the loaded value second 57define void @f4(i16 *%ptr, i32 %alt, i32 %limit) { 58; CHECK-LABEL: f4: 59; CHECK-NOT: %r2 60; CHECK: bher %r14 61; CHECK-NOT: %r2 62; CHECK: sth %r3, 0(%r2) 63; CHECK: br %r14 64 %cond = icmp ult i32 %limit, 420 65 %orig = load i16 , i16 *%ptr 66 %ext = sext i16 %orig to i32 67 %res = select i1 %cond, i32 %alt, i32 %ext 68 %trunc = trunc i32 %res to i16 69 store i16 %trunc, i16 *%ptr 70 ret void 71} 72 73; Test cases where the value is explicitly zero-extended to 32 bits, with the 74; loaded value first. 75define void @f5(i16 *%ptr, i32 %alt, i32 %limit) { 76; CHECK-LABEL: f5: 77; CHECK-NOT: %r2 78; CHECK: blr %r14 79; CHECK-NOT: %r2 80; CHECK: sth %r3, 0(%r2) 81; CHECK: br %r14 82 %cond = icmp ult i32 %limit, 420 83 %orig = load i16 , i16 *%ptr 84 %ext = zext i16 %orig to i32 85 %res = select i1 %cond, i32 %ext, i32 %alt 86 %trunc = trunc i32 %res to i16 87 store i16 %trunc, i16 *%ptr 88 ret void 89} 90 91; ...and with the loaded value second 92define void @f6(i16 *%ptr, i32 %alt, i32 %limit) { 93; CHECK-LABEL: f6: 94; CHECK-NOT: %r2 95; CHECK: bher %r14 96; CHECK-NOT: %r2 97; CHECK: sth %r3, 0(%r2) 98; CHECK: br %r14 99 %cond = icmp ult i32 %limit, 420 100 %orig = load i16 , i16 *%ptr 101 %ext = zext i16 %orig to i32 102 %res = select i1 %cond, i32 %alt, i32 %ext 103 %trunc = trunc i32 %res to i16 104 store i16 %trunc, i16 *%ptr 105 ret void 106} 107 108; Test cases where the value is explicitly sign-extended to 64 bits, with the 109; loaded value first. 110define void @f7(i16 *%ptr, i64 %alt, i32 %limit) { 111; CHECK-LABEL: f7: 112; CHECK-NOT: %r2 113; CHECK: blr %r14 114; CHECK-NOT: %r2 115; CHECK: sth %r3, 0(%r2) 116; CHECK: br %r14 117 %cond = icmp ult i32 %limit, 420 118 %orig = load i16 , i16 *%ptr 119 %ext = sext i16 %orig to i64 120 %res = select i1 %cond, i64 %ext, i64 %alt 121 %trunc = trunc i64 %res to i16 122 store i16 %trunc, i16 *%ptr 123 ret void 124} 125 126; ...and with the loaded value second 127define void @f8(i16 *%ptr, i64 %alt, i32 %limit) { 128; CHECK-LABEL: f8: 129; CHECK-NOT: %r2 130; CHECK: bher %r14 131; CHECK-NOT: %r2 132; CHECK: sth %r3, 0(%r2) 133; CHECK: br %r14 134 %cond = icmp ult i32 %limit, 420 135 %orig = load i16 , i16 *%ptr 136 %ext = sext i16 %orig to i64 137 %res = select i1 %cond, i64 %alt, i64 %ext 138 %trunc = trunc i64 %res to i16 139 store i16 %trunc, i16 *%ptr 140 ret void 141} 142 143; Test cases where the value is explicitly zero-extended to 64 bits, with the 144; loaded value first. 145define void @f9(i16 *%ptr, i64 %alt, i32 %limit) { 146; CHECK-LABEL: f9: 147; CHECK-NOT: %r2 148; CHECK: blr %r14 149; CHECK-NOT: %r2 150; CHECK: sth %r3, 0(%r2) 151; CHECK: br %r14 152 %cond = icmp ult i32 %limit, 420 153 %orig = load i16 , i16 *%ptr 154 %ext = zext i16 %orig to i64 155 %res = select i1 %cond, i64 %ext, i64 %alt 156 %trunc = trunc i64 %res to i16 157 store i16 %trunc, i16 *%ptr 158 ret void 159} 160 161; ...and with the loaded value second 162define void @f10(i16 *%ptr, i64 %alt, i32 %limit) { 163; CHECK-LABEL: f10: 164; CHECK-NOT: %r2 165; CHECK: bher %r14 166; CHECK-NOT: %r2 167; CHECK: sth %r3, 0(%r2) 168; CHECK: br %r14 169 %cond = icmp ult i32 %limit, 420 170 %orig = load i16 , i16 *%ptr 171 %ext = zext i16 %orig to i64 172 %res = select i1 %cond, i64 %alt, i64 %ext 173 %trunc = trunc i64 %res to i16 174 store i16 %trunc, i16 *%ptr 175 ret void 176} 177 178; Check the high end of the aligned STH range. 179define void @f11(i16 *%base, i16 %alt, i32 %limit) { 180; CHECK-LABEL: f11: 181; CHECK-NOT: %r2 182; CHECK: blr %r14 183; CHECK-NOT: %r2 184; CHECK: sth %r3, 4094(%r2) 185; CHECK: br %r14 186 %ptr = getelementptr i16, i16 *%base, i64 2047 187 %cond = icmp ult i32 %limit, 420 188 %orig = load i16 , i16 *%ptr 189 %res = select i1 %cond, i16 %orig, i16 %alt 190 store i16 %res, i16 *%ptr 191 ret void 192} 193 194; Check the next halfword up, which should use STHY instead of STH. 195define void @f12(i16 *%base, i16 %alt, i32 %limit) { 196; CHECK-LABEL: f12: 197; CHECK-NOT: %r2 198; CHECK: blr %r14 199; CHECK-NOT: %r2 200; CHECK: sthy %r3, 4096(%r2) 201; CHECK: br %r14 202 %ptr = getelementptr i16, i16 *%base, i64 2048 203 %cond = icmp ult i32 %limit, 420 204 %orig = load i16 , i16 *%ptr 205 %res = select i1 %cond, i16 %orig, i16 %alt 206 store i16 %res, i16 *%ptr 207 ret void 208} 209 210; Check the high end of the aligned STHY range. 211define void @f13(i16 *%base, i16 %alt, i32 %limit) { 212; CHECK-LABEL: f13: 213; CHECK-NOT: %r2 214; CHECK: blr %r14 215; CHECK-NOT: %r2 216; CHECK: sthy %r3, 524286(%r2) 217; CHECK: br %r14 218 %ptr = getelementptr i16, i16 *%base, i64 262143 219 %cond = icmp ult i32 %limit, 420 220 %orig = load i16 , i16 *%ptr 221 %res = select i1 %cond, i16 %orig, i16 %alt 222 store i16 %res, i16 *%ptr 223 ret void 224} 225 226; Check the next halfword up, which needs separate address logic. 227; Other sequences besides this one would be OK. 228define void @f14(i16 *%base, i16 %alt, i32 %limit) { 229; CHECK-LABEL: f14: 230; CHECK-NOT: %r2 231; CHECK: blr %r14 232; CHECK-NOT: %r2 233; CHECK: agfi %r2, 524288 234; CHECK: sth %r3, 0(%r2) 235; CHECK: br %r14 236 %ptr = getelementptr i16, i16 *%base, i64 262144 237 %cond = icmp ult i32 %limit, 420 238 %orig = load i16 , i16 *%ptr 239 %res = select i1 %cond, i16 %orig, i16 %alt 240 store i16 %res, i16 *%ptr 241 ret void 242} 243 244; Check the low end of the STHY range. 245define void @f15(i16 *%base, i16 %alt, i32 %limit) { 246; CHECK-LABEL: f15: 247; CHECK-NOT: %r2 248; CHECK: blr %r14 249; CHECK-NOT: %r2 250; CHECK: sthy %r3, -524288(%r2) 251; CHECK: br %r14 252 %ptr = getelementptr i16, i16 *%base, i64 -262144 253 %cond = icmp ult i32 %limit, 420 254 %orig = load i16 , i16 *%ptr 255 %res = select i1 %cond, i16 %orig, i16 %alt 256 store i16 %res, i16 *%ptr 257 ret void 258} 259 260; Check the next halfword down, which needs separate address logic. 261; Other sequences besides this one would be OK. 262define void @f16(i16 *%base, i16 %alt, i32 %limit) { 263; CHECK-LABEL: f16: 264; CHECK-NOT: %r2 265; CHECK: blr %r14 266; CHECK-NOT: %r2 267; CHECK: agfi %r2, -524290 268; CHECK: sth %r3, 0(%r2) 269; CHECK: br %r14 270 %ptr = getelementptr i16, i16 *%base, i64 -262145 271 %cond = icmp ult i32 %limit, 420 272 %orig = load i16 , i16 *%ptr 273 %res = select i1 %cond, i16 %orig, i16 %alt 274 store i16 %res, i16 *%ptr 275 ret void 276} 277 278; Check that STHY allows an index. 279define void @f17(i64 %base, i64 %index, i16 %alt, i32 %limit) { 280; CHECK-LABEL: f17: 281; CHECK-NOT: %r2 282; CHECK: blr %r14 283; CHECK-NOT: %r2 284; CHECK: sthy %r4, 4096(%r3,%r2) 285; CHECK: br %r14 286 %add1 = add i64 %base, %index 287 %add2 = add i64 %add1, 4096 288 %ptr = inttoptr i64 %add2 to i16 * 289 %cond = icmp ult i32 %limit, 420 290 %orig = load i16 , i16 *%ptr 291 %res = select i1 %cond, i16 %orig, i16 %alt 292 store i16 %res, i16 *%ptr 293 ret void 294} 295 296; Check that volatile loads are not matched. 297define void @f18(i16 *%ptr, i16 %alt, i32 %limit) { 298; CHECK-LABEL: f18: 299; CHECK: lh {{%r[0-5]}}, 0(%r2) 300; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] 301; CHECK: [[LABEL]]: 302; CHECK: sth {{%r[0-5]}}, 0(%r2) 303; CHECK: br %r14 304 %cond = icmp ult i32 %limit, 420 305 %orig = load volatile i16 , i16 *%ptr 306 %res = select i1 %cond, i16 %orig, i16 %alt 307 store i16 %res, i16 *%ptr 308 ret void 309} 310 311; ...likewise stores. In this case we should have a conditional load into %r3. 312define void @f19(i16 *%ptr, i16 %alt, i32 %limit) { 313; CHECK-LABEL: f19: 314; CHECK: jhe [[LABEL:[^ ]*]] 315; CHECK: lh %r3, 0(%r2) 316; CHECK: [[LABEL]]: 317; CHECK: sth %r3, 0(%r2) 318; CHECK: br %r14 319 %cond = icmp ult i32 %limit, 420 320 %orig = load i16 , i16 *%ptr 321 %res = select i1 %cond, i16 %orig, i16 %alt 322 store volatile i16 %res, i16 *%ptr 323 ret void 324} 325 326; Check that atomic loads are not matched. The transformation is OK for 327; the "unordered" case tested here, but since we don't try to handle atomic 328; operations at all in this context, it seems better to assert that than 329; to restrict the test to a stronger ordering. 330define void @f20(i16 *%ptr, i16 %alt, i32 %limit) { 331; FIXME: should use a normal load instead of CS. 332; CHECK-LABEL: f20: 333; CHECK: lh {{%r[0-9]+}}, 0(%r2) 334; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] 335; CHECK: [[LABEL]]: 336; CHECK: sth {{%r[0-9]+}}, 0(%r2) 337; CHECK: br %r14 338 %cond = icmp ult i32 %limit, 420 339 %orig = load atomic i16 , i16 *%ptr unordered, align 2 340 %res = select i1 %cond, i16 %orig, i16 %alt 341 store i16 %res, i16 *%ptr 342 ret void 343} 344 345; ...likewise stores. 346define void @f21(i16 *%ptr, i16 %alt, i32 %limit) { 347; FIXME: should use a normal store instead of CS. 348; CHECK-LABEL: f21: 349; CHECK: jhe [[LABEL:[^ ]*]] 350; CHECK: lh %r3, 0(%r2) 351; CHECK: [[LABEL]]: 352; CHECK: sth %r3, 0(%r2) 353; CHECK: br %r14 354 %cond = icmp ult i32 %limit, 420 355 %orig = load i16 , i16 *%ptr 356 %res = select i1 %cond, i16 %orig, i16 %alt 357 store atomic i16 %res, i16 *%ptr unordered, align 2 358 ret void 359} 360 361; Try a frame index base. 362define void @f22(i16 %alt, i32 %limit) { 363; CHECK-LABEL: f22: 364; CHECK: brasl %r14, foo@PLT 365; CHECK-NOT: %r15 366; CHECK: jl [[LABEL:[^ ]*]] 367; CHECK-NOT: %r15 368; CHECK: sth {{%r[0-9]+}}, {{[0-9]+}}(%r15) 369; CHECK: [[LABEL]]: 370; CHECK: brasl %r14, foo@PLT 371; CHECK: br %r14 372 %ptr = alloca i16 373 call void @foo(i16 *%ptr) 374 %cond = icmp ult i32 %limit, 420 375 %orig = load i16 , i16 *%ptr 376 %res = select i1 %cond, i16 %orig, i16 %alt 377 store i16 %res, i16 *%ptr 378 call void @foo(i16 *%ptr) 379 ret void 380} 381