1; Test memory-to-memory ANDs. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s 4 5@g1src = global i8 1 6@g1dst = global i8 1 7@g2src = global i16 2 8@g2dst = global i16 2 9 10; Test the simple i8 case. 11define void @f1(i8 *%ptr1) { 12; CHECK-LABEL: f1: 13; CHECK: nc 1(1,%r2), 0(%r2) 14; CHECK: br %r14 15 %ptr2 = getelementptr i8, i8 *%ptr1, i64 1 16 %val = load i8 , i8 *%ptr1 17 %old = load i8 , i8 *%ptr2 18 %and = and i8 %val, %old 19 store i8 %and, i8 *%ptr2 20 ret void 21} 22 23; ...and again in reverse. 24define void @f2(i8 *%ptr1) { 25; CHECK-LABEL: f2: 26; CHECK: nc 1(1,%r2), 0(%r2) 27; CHECK: br %r14 28 %ptr2 = getelementptr i8, i8 *%ptr1, i64 1 29 %val = load i8 , i8 *%ptr1 30 %old = load i8 , i8 *%ptr2 31 %and = and i8 %old, %val 32 store i8 %and, i8 *%ptr2 33 ret void 34} 35 36; Test i8 cases where one value is zero-extended to 32 bits and the other 37; sign-extended. 38define void @f3(i8 *%ptr1) { 39; CHECK-LABEL: f3: 40; CHECK: nc 1(1,%r2), 0(%r2) 41; CHECK: br %r14 42 %ptr2 = getelementptr i8, i8 *%ptr1, i64 1 43 %val = load i8 , i8 *%ptr1 44 %extval = zext i8 %val to i32 45 %old = load i8 , i8 *%ptr2 46 %extold = sext i8 %old to i32 47 %and = and i32 %extval, %extold 48 %trunc = trunc i32 %and to i8 49 store i8 %trunc, i8 *%ptr2 50 ret void 51} 52 53; ...and again with the extension types reversed. 54define void @f4(i8 *%ptr1) { 55; CHECK-LABEL: f4: 56; CHECK: nc 1(1,%r2), 0(%r2) 57; CHECK: br %r14 58 %ptr2 = getelementptr i8, i8 *%ptr1, i64 1 59 %val = load i8 , i8 *%ptr1 60 %extval = sext i8 %val to i32 61 %old = load i8 , i8 *%ptr2 62 %extold = zext i8 %old to i32 63 %and = and i32 %extval, %extold 64 %trunc = trunc i32 %and to i8 65 store i8 %trunc, i8 *%ptr2 66 ret void 67} 68 69; ...and again with two sign extensions. 70define void @f5(i8 *%ptr1) { 71; CHECK-LABEL: f5: 72; CHECK: nc 1(1,%r2), 0(%r2) 73; CHECK: br %r14 74 %ptr2 = getelementptr i8, i8 *%ptr1, i64 1 75 %val = load i8 , i8 *%ptr1 76 %extval = sext i8 %val to i32 77 %old = load i8 , i8 *%ptr2 78 %extold = sext i8 %old to i32 79 %and = and i32 %extval, %extold 80 %trunc = trunc i32 %and to i8 81 store i8 %trunc, i8 *%ptr2 82 ret void 83} 84 85; ...and again with two zero extensions. 86define void @f6(i8 *%ptr1) { 87; CHECK-LABEL: f6: 88; CHECK: nc 1(1,%r2), 0(%r2) 89; CHECK: br %r14 90 %ptr2 = getelementptr i8, i8 *%ptr1, i64 1 91 %val = load i8 , i8 *%ptr1 92 %extval = zext i8 %val to i32 93 %old = load i8 , i8 *%ptr2 94 %extold = zext i8 %old to i32 95 %and = and i32 %extval, %extold 96 %trunc = trunc i32 %and to i8 97 store i8 %trunc, i8 *%ptr2 98 ret void 99} 100 101; Test i8 cases where the value is extended to 64 bits (just one case 102; this time). 103define void @f7(i8 *%ptr1) { 104; CHECK-LABEL: f7: 105; CHECK: nc 1(1,%r2), 0(%r2) 106; CHECK: br %r14 107 %ptr2 = getelementptr i8, i8 *%ptr1, i64 1 108 %val = load i8 , i8 *%ptr1 109 %extval = sext i8 %val to i64 110 %old = load i8 , i8 *%ptr2 111 %extold = zext i8 %old to i64 112 %and = and i64 %extval, %extold 113 %trunc = trunc i64 %and to i8 114 store i8 %trunc, i8 *%ptr2 115 ret void 116} 117 118; Test the simple i16 case. 119define void @f8(i16 *%ptr1) { 120; CHECK-LABEL: f8: 121; CHECK: nc 2(2,%r2), 0(%r2) 122; CHECK: br %r14 123 %ptr2 = getelementptr i16, i16 *%ptr1, i64 1 124 %val = load i16 , i16 *%ptr1 125 %old = load i16 , i16 *%ptr2 126 %and = and i16 %val, %old 127 store i16 %and, i16 *%ptr2 128 ret void 129} 130 131; Test i16 cases where the value is extended to 32 bits. 132define void @f9(i16 *%ptr1) { 133; CHECK-LABEL: f9: 134; CHECK: nc 2(2,%r2), 0(%r2) 135; CHECK: br %r14 136 %ptr2 = getelementptr i16, i16 *%ptr1, i64 1 137 %val = load i16 , i16 *%ptr1 138 %extval = zext i16 %val to i32 139 %old = load i16 , i16 *%ptr2 140 %extold = sext i16 %old to i32 141 %and = and i32 %extval, %extold 142 %trunc = trunc i32 %and to i16 143 store i16 %trunc, i16 *%ptr2 144 ret void 145} 146 147; Test i16 cases where the value is extended to 64 bits. 148define void @f10(i16 *%ptr1) { 149; CHECK-LABEL: f10: 150; CHECK: nc 2(2,%r2), 0(%r2) 151; CHECK: br %r14 152 %ptr2 = getelementptr i16, i16 *%ptr1, i64 1 153 %val = load i16 , i16 *%ptr1 154 %extval = sext i16 %val to i64 155 %old = load i16 , i16 *%ptr2 156 %extold = zext i16 %old to i64 157 %and = and i64 %extval, %extold 158 %trunc = trunc i64 %and to i16 159 store i16 %trunc, i16 *%ptr2 160 ret void 161} 162 163; Test the simple i32 case. 164define void @f11(i32 *%ptr1) { 165; CHECK-LABEL: f11: 166; CHECK: nc 4(4,%r2), 0(%r2) 167; CHECK: br %r14 168 %ptr2 = getelementptr i32, i32 *%ptr1, i64 1 169 %val = load i32 , i32 *%ptr1 170 %old = load i32 , i32 *%ptr2 171 %and = and i32 %old, %val 172 store i32 %and, i32 *%ptr2 173 ret void 174} 175 176; Test i32 cases where the value is extended to 64 bits. 177define void @f12(i32 *%ptr1) { 178; CHECK-LABEL: f12: 179; CHECK: nc 4(4,%r2), 0(%r2) 180; CHECK: br %r14 181 %ptr2 = getelementptr i32, i32 *%ptr1, i64 1 182 %val = load i32 , i32 *%ptr1 183 %extval = sext i32 %val to i64 184 %old = load i32 , i32 *%ptr2 185 %extold = zext i32 %old to i64 186 %and = and i64 %extval, %extold 187 %trunc = trunc i64 %and to i32 188 store i32 %trunc, i32 *%ptr2 189 ret void 190} 191 192; Test the i64 case. 193define void @f13(i64 *%ptr1) { 194; CHECK-LABEL: f13: 195; CHECK: nc 8(8,%r2), 0(%r2) 196; CHECK: br %r14 197 %ptr2 = getelementptr i64, i64 *%ptr1, i64 1 198 %val = load i64 , i64 *%ptr1 199 %old = load i64 , i64 *%ptr2 200 %and = and i64 %old, %val 201 store i64 %and, i64 *%ptr2 202 ret void 203} 204 205; Make sure that we don't use NC if the first load is volatile. 206define void @f14(i64 *%ptr1) { 207; CHECK-LABEL: f14: 208; CHECK-NOT: nc 209; CHECK: br %r14 210 %ptr2 = getelementptr i64, i64 *%ptr1, i64 1 211 %val = load volatile i64 , i64 *%ptr1 212 %old = load i64 , i64 *%ptr2 213 %and = and i64 %old, %val 214 store i64 %and, i64 *%ptr2 215 ret void 216} 217 218; ...likewise the second. 219define void @f15(i64 *%ptr1) { 220; CHECK-LABEL: f15: 221; CHECK-NOT: nc 222; CHECK: br %r14 223 %ptr2 = getelementptr i64, i64 *%ptr1, i64 1 224 %val = load i64 , i64 *%ptr1 225 %old = load volatile i64 , i64 *%ptr2 226 %and = and i64 %old, %val 227 store i64 %and, i64 *%ptr2 228 ret void 229} 230 231; ...likewise the store. 232define void @f16(i64 *%ptr1) { 233; CHECK-LABEL: f16: 234; CHECK-NOT: nc 235; CHECK: br %r14 236 %ptr2 = getelementptr i64, i64 *%ptr1, i64 1 237 %val = load i64 , i64 *%ptr1 238 %old = load i64 , i64 *%ptr2 239 %and = and i64 %old, %val 240 store volatile i64 %and, i64 *%ptr2 241 ret void 242} 243 244; Test that NC is not used for aligned loads and stores if there is 245; no way of telling whether they alias. We don't want to use NC in 246; cases where the addresses could be equal. 247define void @f17(i64 *%ptr1, i64 *%ptr2) { 248; CHECK-LABEL: f17: 249; CHECK-NOT: nc 250; CHECK: br %r14 251 %val = load i64 , i64 *%ptr1 252 %old = load i64 , i64 *%ptr2 253 %and = and i64 %old, %val 254 store i64 %and, i64 *%ptr2 255 ret void 256} 257 258; ...but if one of the loads isn't aligned, we can't be sure. 259define void @f18(i64 *%ptr1, i64 *%ptr2) { 260; CHECK-LABEL: f18: 261; CHECK-NOT: nc 262; CHECK: br %r14 263 %val = load i64 , i64 *%ptr1, align 2 264 %old = load i64 , i64 *%ptr2 265 %and = and i64 %old, %val 266 store i64 %and, i64 *%ptr2 267 ret void 268} 269 270; Repeat the previous test with the operands in the opposite order. 271define void @f19(i64 *%ptr1, i64 *%ptr2) { 272; CHECK-LABEL: f19: 273; CHECK-NOT: nc 274; CHECK: br %r14 275 %val = load i64 , i64 *%ptr1, align 2 276 %old = load i64 , i64 *%ptr2 277 %and = and i64 %val, %old 278 store i64 %and, i64 *%ptr2 279 ret void 280} 281 282; ...and again with the other operand being unaligned. 283define void @f20(i64 *%ptr1, i64 *%ptr2) { 284; CHECK-LABEL: f20: 285; CHECK-NOT: nc 286; CHECK: br %r14 287 %val = load i64 , i64 *%ptr1 288 %old = load i64 , i64 *%ptr2, align 2 289 %and = and i64 %val, %old 290 store i64 %and, i64 *%ptr2, align 2 291 ret void 292} 293 294; Test a case where there is definite overlap. 295define void @f21(i64 %base) { 296; CHECK-LABEL: f21: 297; CHECK-NOT: nc 298; CHECK: br %r14 299 %add = add i64 %base, 1 300 %ptr1 = inttoptr i64 %base to i64 * 301 %ptr2 = inttoptr i64 %add to i64 * 302 %val = load i64 , i64 *%ptr1 303 %old = load i64 , i64 *%ptr2, align 1 304 %and = and i64 %old, %val 305 store i64 %and, i64 *%ptr2, align 1 306 ret void 307} 308 309; Test that we can use NC for global addresses for i8. 310define void @f22(i8 *%ptr) { 311; CHECK-LABEL: f22: 312; CHECK-DAG: larl [[SRC:%r[0-5]]], g1src 313; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst 314; CHECK: nc 0(1,[[DST]]), 0([[SRC]]) 315; CHECK: br %r14 316 %val = load i8 , i8 *@g1src 317 %old = load i8 , i8 *@g1dst 318 %and = and i8 %val, %old 319 store i8 %and, i8 *@g1dst 320 ret void 321} 322 323; Test that we use NC even where LHRL and STHRL are available. 324define void @f23(i16 *%ptr) { 325; CHECK-LABEL: f23: 326; CHECK-DAG: larl [[SRC:%r[0-5]]], g2src 327; CHECK-DAG: larl [[DST:%r[0-5]]], g2dst 328; CHECK: nc 0(2,[[DST]]), 0([[SRC]]) 329; CHECK: br %r14 330 %val = load i16 , i16 *@g2src 331 %old = load i16 , i16 *@g2dst 332 %and = and i16 %val, %old 333 store i16 %and, i16 *@g2dst 334 ret void 335} 336 337; Test a case where offset disambiguation is enough. 338define void @f24(i64 *%ptr1) { 339; CHECK-LABEL: f24: 340; CHECK: nc 8(8,%r2), 0(%r2) 341; CHECK: br %r14 342 %ptr2 = getelementptr i64, i64 *%ptr1, i64 1 343 %val = load i64 , i64 *%ptr1, align 1 344 %old = load i64 , i64 *%ptr2, align 1 345 %and = and i64 %old, %val 346 store i64 %and, i64 *%ptr2, align 1 347 ret void 348} 349 350; Test a case where TBAA tells us there is no alias. 351define void @f25(i64 *%ptr1, i64 *%ptr2) { 352; CHECK-LABEL: f25: 353; CHECK: nc 0(8,%r3), 0(%r2) 354; CHECK: br %r14 355 %val = load i64 , i64 *%ptr1, align 2, !tbaa !3 356 %old = load i64 , i64 *%ptr2, align 2, !tbaa !4 357 %and = and i64 %old, %val 358 store i64 %and, i64 *%ptr2, align 2, !tbaa !4 359 ret void 360} 361 362; Test a case where TBAA information is present but doesn't help. 363define void @f26(i64 *%ptr1, i64 *%ptr2) { 364; CHECK-LABEL: f26: 365; CHECK-NOT: nc 366; CHECK: br %r14 367 %val = load i64 , i64 *%ptr1, align 2, !tbaa !3 368 %old = load i64 , i64 *%ptr2, align 2, !tbaa !3 369 %and = and i64 %old, %val 370 store i64 %and, i64 *%ptr2, align 2, !tbaa !3 371 ret void 372} 373 374!0 = !{ !"root" } 375!1 = !{ !"set1", !0 } 376!2 = !{ !"set2", !0 } 377!3 = !{ !1, !1, i64 0} 378!4 = !{ !2, !2, i64 0} 379