1; RUN: not --crash llc > /dev/null < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt 2; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s 3 4; Test that atomic loads are assembled properly. 5 6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" 7target triple = "wasm32-unknown-unknown" 8 9;===---------------------------------------------------------------------------- 10; Atomic loads: 32-bit 11;===---------------------------------------------------------------------------- 12 13; Basic load. 14 15; CHECK-LABEL: load_i32_no_offset: 16; CHECK: i32.atomic.load $push0=, 0($0){{$}} 17; CHECK-NEXT: return $pop0{{$}} 18define i32 @load_i32_no_offset(i32 *%p) { 19 %v = load atomic i32, i32* %p seq_cst, align 4 20 ret i32 %v 21} 22 23; With an nuw add, we can fold an offset. 24 25; CHECK-LABEL: load_i32_with_folded_offset: 26; CHECK: i32.atomic.load $push0=, 24($0){{$}} 27define i32 @load_i32_with_folded_offset(i32* %p) { 28 %q = ptrtoint i32* %p to i32 29 %r = add nuw i32 %q, 24 30 %s = inttoptr i32 %r to i32* 31 %t = load atomic i32, i32* %s seq_cst, align 4 32 ret i32 %t 33} 34 35; With an inbounds gep, we can fold an offset. 36 37; CHECK-LABEL: load_i32_with_folded_gep_offset: 38; CHECK: i32.atomic.load $push0=, 24($0){{$}} 39define i32 @load_i32_with_folded_gep_offset(i32* %p) { 40 %s = getelementptr inbounds i32, i32* %p, i32 6 41 %t = load atomic i32, i32* %s seq_cst, align 4 42 ret i32 %t 43} 44 45; We can't fold a negative offset though, even with an inbounds gep. 46 47; CHECK-LABEL: load_i32_with_unfolded_gep_negative_offset: 48; CHECK: i32.const $push0=, -24{{$}} 49; CHECK: i32.add $push1=, $0, $pop0{{$}} 50; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} 51define i32 @load_i32_with_unfolded_gep_negative_offset(i32* %p) { 52 %s = getelementptr inbounds i32, i32* %p, i32 -6 53 %t = load atomic i32, i32* %s seq_cst, align 4 54 ret i32 %t 55} 56 57; Without nuw, and even with nsw, we can't fold an offset. 58 59; CHECK-LABEL: load_i32_with_unfolded_offset: 60; CHECK: i32.const $push0=, 24{{$}} 61; CHECK: i32.add $push1=, $0, $pop0{{$}} 62; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} 63define i32 @load_i32_with_unfolded_offset(i32* %p) { 64 %q = ptrtoint i32* %p to i32 65 %r = add nsw i32 %q, 24 66 %s = inttoptr i32 %r to i32* 67 %t = load atomic i32, i32* %s seq_cst, align 4 68 ret i32 %t 69} 70 71; Without inbounds, we can't fold a gep offset. 72 73; CHECK-LABEL: load_i32_with_unfolded_gep_offset: 74; CHECK: i32.const $push0=, 24{{$}} 75; CHECK: i32.add $push1=, $0, $pop0{{$}} 76; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} 77define i32 @load_i32_with_unfolded_gep_offset(i32* %p) { 78 %s = getelementptr i32, i32* %p, i32 6 79 %t = load atomic i32, i32* %s seq_cst, align 4 80 ret i32 %t 81} 82 83; When loading from a fixed address, materialize a zero. 84 85; CHECK-LABEL: load_i32_from_numeric_address 86; CHECK: i32.const $push0=, 0{{$}} 87; CHECK: i32.atomic.load $push1=, 42($pop0){{$}} 88define i32 @load_i32_from_numeric_address() { 89 %s = inttoptr i32 42 to i32* 90 %t = load atomic i32, i32* %s seq_cst, align 4 91 ret i32 %t 92} 93 94; CHECK-LABEL: load_i32_from_global_address 95; CHECK: i32.const $push0=, 0{{$}} 96; CHECK: i32.atomic.load $push1=, gv($pop0){{$}} 97@gv = global i32 0 98define i32 @load_i32_from_global_address() { 99 %t = load atomic i32, i32* @gv seq_cst, align 4 100 ret i32 %t 101} 102 103;===---------------------------------------------------------------------------- 104; Atomic loads: 64-bit 105;===---------------------------------------------------------------------------- 106 107; Basic load. 108 109; CHECK-LABEL: load_i64_no_offset: 110; CHECK: i64.atomic.load $push0=, 0($0){{$}} 111; CHECK-NEXT: return $pop0{{$}} 112define i64 @load_i64_no_offset(i64 *%p) { 113 %v = load atomic i64, i64* %p seq_cst, align 8 114 ret i64 %v 115} 116 117; With an nuw add, we can fold an offset. 118 119; CHECK-LABEL: load_i64_with_folded_offset: 120; CHECK: i64.atomic.load $push0=, 24($0){{$}} 121define i64 @load_i64_with_folded_offset(i64* %p) { 122 %q = ptrtoint i64* %p to i32 123 %r = add nuw i32 %q, 24 124 %s = inttoptr i32 %r to i64* 125 %t = load atomic i64, i64* %s seq_cst, align 8 126 ret i64 %t 127} 128 129; With an inbounds gep, we can fold an offset. 130 131; CHECK-LABEL: load_i64_with_folded_gep_offset: 132; CHECK: i64.atomic.load $push0=, 24($0){{$}} 133define i64 @load_i64_with_folded_gep_offset(i64* %p) { 134 %s = getelementptr inbounds i64, i64* %p, i32 3 135 %t = load atomic i64, i64* %s seq_cst, align 8 136 ret i64 %t 137} 138 139; We can't fold a negative offset though, even with an inbounds gep. 140 141; CHECK-LABEL: load_i64_with_unfolded_gep_negative_offset: 142; CHECK: i32.const $push0=, -24{{$}} 143; CHECK: i32.add $push1=, $0, $pop0{{$}} 144; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} 145define i64 @load_i64_with_unfolded_gep_negative_offset(i64* %p) { 146 %s = getelementptr inbounds i64, i64* %p, i32 -3 147 %t = load atomic i64, i64* %s seq_cst, align 8 148 ret i64 %t 149} 150 151; Without nuw, and even with nsw, we can't fold an offset. 152 153; CHECK-LABEL: load_i64_with_unfolded_offset: 154; CHECK: i32.const $push0=, 24{{$}} 155; CHECK: i32.add $push1=, $0, $pop0{{$}} 156; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} 157define i64 @load_i64_with_unfolded_offset(i64* %p) { 158 %q = ptrtoint i64* %p to i32 159 %r = add nsw i32 %q, 24 160 %s = inttoptr i32 %r to i64* 161 %t = load atomic i64, i64* %s seq_cst, align 8 162 ret i64 %t 163} 164 165; Without inbounds, we can't fold a gep offset. 166 167; CHECK-LABEL: load_i64_with_unfolded_gep_offset: 168; CHECK: i32.const $push0=, 24{{$}} 169; CHECK: i32.add $push1=, $0, $pop0{{$}} 170; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} 171define i64 @load_i64_with_unfolded_gep_offset(i64* %p) { 172 %s = getelementptr i64, i64* %p, i32 3 173 %t = load atomic i64, i64* %s seq_cst, align 8 174 ret i64 %t 175} 176 177;===---------------------------------------------------------------------------- 178; Atomic stores: 32-bit 179;===---------------------------------------------------------------------------- 180 181; Basic store. 182 183; CHECK-LABEL: store_i32_no_offset: 184; CHECK-NEXT: .functype store_i32_no_offset (i32, i32) -> (){{$}} 185; CHECK-NEXT: i32.atomic.store 0($0), $1{{$}} 186; CHECK-NEXT: return{{$}} 187define void @store_i32_no_offset(i32 *%p, i32 %v) { 188 store atomic i32 %v, i32* %p seq_cst, align 4 189 ret void 190} 191 192; With an nuw add, we can fold an offset. 193 194; CHECK-LABEL: store_i32_with_folded_offset: 195; CHECK: i32.atomic.store 24($0), $pop0{{$}} 196define void @store_i32_with_folded_offset(i32* %p) { 197 %q = ptrtoint i32* %p to i32 198 %r = add nuw i32 %q, 24 199 %s = inttoptr i32 %r to i32* 200 store atomic i32 0, i32* %s seq_cst, align 4 201 ret void 202} 203 204; With an inbounds gep, we can fold an offset. 205 206; CHECK-LABEL: store_i32_with_folded_gep_offset: 207; CHECK: i32.atomic.store 24($0), $pop0{{$}} 208define void @store_i32_with_folded_gep_offset(i32* %p) { 209 %s = getelementptr inbounds i32, i32* %p, i32 6 210 store atomic i32 0, i32* %s seq_cst, align 4 211 ret void 212} 213 214; We can't fold a negative offset though, even with an inbounds gep. 215 216; CHECK-LABEL: store_i32_with_unfolded_gep_negative_offset: 217; CHECK: i32.const $push0=, -24{{$}} 218; CHECK: i32.add $push1=, $0, $pop0{{$}} 219; CHECK: i32.atomic.store 0($pop1), $pop2{{$}} 220define void @store_i32_with_unfolded_gep_negative_offset(i32* %p) { 221 %s = getelementptr inbounds i32, i32* %p, i32 -6 222 store atomic i32 0, i32* %s seq_cst, align 4 223 ret void 224} 225 226; Without nuw, and even with nsw, we can't fold an offset. 227 228; CHECK-LABEL: store_i32_with_unfolded_offset: 229; CHECK: i32.const $push0=, 24{{$}} 230; CHECK: i32.add $push1=, $0, $pop0{{$}} 231; CHECK: i32.atomic.store 0($pop1), $pop2{{$}} 232define void @store_i32_with_unfolded_offset(i32* %p) { 233 %q = ptrtoint i32* %p to i32 234 %r = add nsw i32 %q, 24 235 %s = inttoptr i32 %r to i32* 236 store atomic i32 0, i32* %s seq_cst, align 4 237 ret void 238} 239 240; Without inbounds, we can't fold a gep offset. 241 242; CHECK-LABEL: store_i32_with_unfolded_gep_offset: 243; CHECK: i32.const $push0=, 24{{$}} 244; CHECK: i32.add $push1=, $0, $pop0{{$}} 245; CHECK: i32.atomic.store 0($pop1), $pop2{{$}} 246define void @store_i32_with_unfolded_gep_offset(i32* %p) { 247 %s = getelementptr i32, i32* %p, i32 6 248 store atomic i32 0, i32* %s seq_cst, align 4 249 ret void 250} 251 252; When storing from a fixed address, materialize a zero. 253 254; CHECK-LABEL: store_i32_to_numeric_address: 255; CHECK: i32.const $push0=, 0{{$}} 256; CHECK-NEXT: i32.const $push1=, 0{{$}} 257; CHECK-NEXT: i32.atomic.store 42($pop0), $pop1{{$}} 258define void @store_i32_to_numeric_address() { 259 %s = inttoptr i32 42 to i32* 260 store atomic i32 0, i32* %s seq_cst, align 4 261 ret void 262} 263 264; CHECK-LABEL: store_i32_to_global_address: 265; CHECK: i32.const $push0=, 0{{$}} 266; CHECK: i32.const $push1=, 0{{$}} 267; CHECK: i32.atomic.store gv($pop0), $pop1{{$}} 268define void @store_i32_to_global_address() { 269 store atomic i32 0, i32* @gv seq_cst, align 4 270 ret void 271} 272 273;===---------------------------------------------------------------------------- 274; Atomic stores: 64-bit 275;===---------------------------------------------------------------------------- 276 277; Basic store. 278 279; CHECK-LABEL: store_i64_no_offset: 280; CHECK-NEXT: .functype store_i64_no_offset (i32, i64) -> (){{$}} 281; CHECK-NEXT: i64.atomic.store 0($0), $1{{$}} 282; CHECK-NEXT: return{{$}} 283define void @store_i64_no_offset(i64 *%p, i64 %v) { 284 store atomic i64 %v, i64* %p seq_cst, align 8 285 ret void 286} 287 288; With an nuw add, we can fold an offset. 289 290; CHECK-LABEL: store_i64_with_folded_offset: 291; CHECK: i64.atomic.store 24($0), $pop0{{$}} 292define void @store_i64_with_folded_offset(i64* %p) { 293 %q = ptrtoint i64* %p to i32 294 %r = add nuw i32 %q, 24 295 %s = inttoptr i32 %r to i64* 296 store atomic i64 0, i64* %s seq_cst, align 8 297 ret void 298} 299 300; With an inbounds gep, we can fold an offset. 301 302; CHECK-LABEL: store_i64_with_folded_gep_offset: 303; CHECK: i64.atomic.store 24($0), $pop0{{$}} 304define void @store_i64_with_folded_gep_offset(i64* %p) { 305 %s = getelementptr inbounds i64, i64* %p, i32 3 306 store atomic i64 0, i64* %s seq_cst, align 8 307 ret void 308} 309 310; We can't fold a negative offset though, even with an inbounds gep. 311 312; CHECK-LABEL: store_i64_with_unfolded_gep_negative_offset: 313; CHECK: i32.const $push0=, -24{{$}} 314; CHECK: i32.add $push1=, $0, $pop0{{$}} 315; CHECK: i64.atomic.store 0($pop1), $pop2{{$}} 316define void @store_i64_with_unfolded_gep_negative_offset(i64* %p) { 317 %s = getelementptr inbounds i64, i64* %p, i32 -3 318 store atomic i64 0, i64* %s seq_cst, align 8 319 ret void 320} 321 322; Without nuw, and even with nsw, we can't fold an offset. 323 324; CHECK-LABEL: store_i64_with_unfolded_offset: 325; CHECK: i32.const $push0=, 24{{$}} 326; CHECK: i32.add $push1=, $0, $pop0{{$}} 327; CHECK: i64.atomic.store 0($pop1), $pop2{{$}} 328define void @store_i64_with_unfolded_offset(i64* %p) { 329 %q = ptrtoint i64* %p to i32 330 %r = add nsw i32 %q, 24 331 %s = inttoptr i32 %r to i64* 332 store atomic i64 0, i64* %s seq_cst, align 8 333 ret void 334} 335 336; Without inbounds, we can't fold a gep offset. 337 338; CHECK-LABEL: store_i64_with_unfolded_gep_offset: 339; CHECK: i32.const $push0=, 24{{$}} 340; CHECK: i32.add $push1=, $0, $pop0{{$}} 341; CHECK: i64.atomic.store 0($pop1), $pop2{{$}} 342define void @store_i64_with_unfolded_gep_offset(i64* %p) { 343 %s = getelementptr i64, i64* %p, i32 3 344 store atomic i64 0, i64* %s seq_cst, align 8 345 ret void 346} 347 348;===---------------------------------------------------------------------------- 349; Atomic sign-extending loads 350;===---------------------------------------------------------------------------- 351 352; Fold an offset into a sign-extending load. 353 354; CHECK-LABEL: load_i8_i32_s_with_folded_offset: 355; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} 356; CHECK-NEXT: i32.extend8_s $push1=, $pop0 357define i32 @load_i8_i32_s_with_folded_offset(i8* %p) { 358 %q = ptrtoint i8* %p to i32 359 %r = add nuw i32 %q, 24 360 %s = inttoptr i32 %r to i8* 361 %t = load atomic i8, i8* %s seq_cst, align 1 362 %u = sext i8 %t to i32 363 ret i32 %u 364} 365 366; 32->64 sext load gets selected as i32.atomic.load, i64.extend_i32_s 367; CHECK-LABEL: load_i32_i64_s_with_folded_offset: 368; CHECK: i32.atomic.load $push0=, 24($0){{$}} 369; CHECK-NEXT: i64.extend_i32_s $push1=, $pop0{{$}} 370define i64 @load_i32_i64_s_with_folded_offset(i32* %p) { 371 %q = ptrtoint i32* %p to i32 372 %r = add nuw i32 %q, 24 373 %s = inttoptr i32 %r to i32* 374 %t = load atomic i32, i32* %s seq_cst, align 4 375 %u = sext i32 %t to i64 376 ret i64 %u 377} 378 379; Fold a gep offset into a sign-extending load. 380 381; CHECK-LABEL: load_i8_i32_s_with_folded_gep_offset: 382; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} 383; CHECK-NEXT: i32.extend8_s $push1=, $pop0 384define i32 @load_i8_i32_s_with_folded_gep_offset(i8* %p) { 385 %s = getelementptr inbounds i8, i8* %p, i32 24 386 %t = load atomic i8, i8* %s seq_cst, align 1 387 %u = sext i8 %t to i32 388 ret i32 %u 389} 390 391; CHECK-LABEL: load_i16_i32_s_with_folded_gep_offset: 392; CHECK: i32.atomic.load16_u $push0=, 48($0){{$}} 393; CHECK-NEXT: i32.extend16_s $push1=, $pop0 394define i32 @load_i16_i32_s_with_folded_gep_offset(i16* %p) { 395 %s = getelementptr inbounds i16, i16* %p, i32 24 396 %t = load atomic i16, i16* %s seq_cst, align 2 397 %u = sext i16 %t to i32 398 ret i32 %u 399} 400 401; CHECK-LABEL: load_i16_i64_s_with_folded_gep_offset: 402; CHECK: i64.atomic.load16_u $push0=, 48($0){{$}} 403; CHECK-NEXT: i64.extend16_s $push1=, $pop0 404define i64 @load_i16_i64_s_with_folded_gep_offset(i16* %p) { 405 %s = getelementptr inbounds i16, i16* %p, i32 24 406 %t = load atomic i16, i16* %s seq_cst, align 2 407 %u = sext i16 %t to i64 408 ret i64 %u 409} 410 411; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 412; an 'add' if the or'ed bits are known to be zero. 413 414; CHECK-LABEL: load_i8_i32_s_with_folded_or_offset: 415; CHECK: i32.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} 416; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 417define i32 @load_i8_i32_s_with_folded_or_offset(i32 %x) { 418 %and = and i32 %x, -4 419 %t0 = inttoptr i32 %and to i8* 420 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 421 %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1 422 %conv = sext i8 %t1 to i32 423 ret i32 %conv 424} 425 426; CHECK-LABEL: load_i8_i64_s_with_folded_or_offset: 427; CHECK: i64.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} 428; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 429define i64 @load_i8_i64_s_with_folded_or_offset(i32 %x) { 430 %and = and i32 %x, -4 431 %t0 = inttoptr i32 %and to i8* 432 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 433 %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1 434 %conv = sext i8 %t1 to i64 435 ret i64 %conv 436} 437 438; When loading from a fixed address, materialize a zero. 439 440; CHECK-LABEL: load_i16_i32_s_from_numeric_address 441; CHECK: i32.const $push0=, 0{{$}} 442; CHECK: i32.atomic.load16_u $push1=, 42($pop0){{$}} 443; CHECK-NEXT: i32.extend16_s $push2=, $pop1 444define i32 @load_i16_i32_s_from_numeric_address() { 445 %s = inttoptr i32 42 to i16* 446 %t = load atomic i16, i16* %s seq_cst, align 2 447 %u = sext i16 %t to i32 448 ret i32 %u 449} 450 451; CHECK-LABEL: load_i8_i32_s_from_global_address 452; CHECK: i32.const $push0=, 0{{$}} 453; CHECK: i32.atomic.load8_u $push1=, gv8($pop0){{$}} 454; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}} 455@gv8 = global i8 0 456define i32 @load_i8_i32_s_from_global_address() { 457 %t = load atomic i8, i8* @gv8 seq_cst, align 1 458 %u = sext i8 %t to i32 459 ret i32 %u 460} 461 462;===---------------------------------------------------------------------------- 463; Atomic zero-extending loads 464;===---------------------------------------------------------------------------- 465 466; Fold an offset into a zero-extending load. 467 468; CHECK-LABEL: load_i8_i32_z_with_folded_offset: 469; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} 470define i32 @load_i8_i32_z_with_folded_offset(i8* %p) { 471 %q = ptrtoint i8* %p to i32 472 %r = add nuw i32 %q, 24 473 %s = inttoptr i32 %r to i8* 474 %t = load atomic i8, i8* %s seq_cst, align 1 475 %u = zext i8 %t to i32 476 ret i32 %u 477} 478 479; CHECK-LABEL: load_i32_i64_z_with_folded_offset: 480; CHECK: i64.atomic.load32_u $push0=, 24($0){{$}} 481define i64 @load_i32_i64_z_with_folded_offset(i32* %p) { 482 %q = ptrtoint i32* %p to i32 483 %r = add nuw i32 %q, 24 484 %s = inttoptr i32 %r to i32* 485 %t = load atomic i32, i32* %s seq_cst, align 4 486 %u = zext i32 %t to i64 487 ret i64 %u 488} 489 490; Fold a gep offset into a zero-extending load. 491 492; CHECK-LABEL: load_i8_i32_z_with_folded_gep_offset: 493; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} 494define i32 @load_i8_i32_z_with_folded_gep_offset(i8* %p) { 495 %s = getelementptr inbounds i8, i8* %p, i32 24 496 %t = load atomic i8, i8* %s seq_cst, align 1 497 %u = zext i8 %t to i32 498 ret i32 %u 499} 500 501; CHECK-LABEL: load_i16_i32_z_with_folded_gep_offset: 502; CHECK: i32.atomic.load16_u $push0=, 48($0){{$}} 503define i32 @load_i16_i32_z_with_folded_gep_offset(i16* %p) { 504 %s = getelementptr inbounds i16, i16* %p, i32 24 505 %t = load atomic i16, i16* %s seq_cst, align 2 506 %u = zext i16 %t to i32 507 ret i32 %u 508} 509 510; CHECK-LABEL: load_i16_i64_z_with_folded_gep_offset: 511; CHECK: i64.atomic.load16_u $push0=, 48($0){{$}} 512define i64 @load_i16_i64_z_with_folded_gep_offset(i16* %p) { 513 %s = getelementptr inbounds i16, i16* %p, i64 24 514 %t = load atomic i16, i16* %s seq_cst, align 2 515 %u = zext i16 %t to i64 516 ret i64 %u 517} 518 519; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 520; an 'add' if the or'ed bits are known to be zero. 521 522; CHECK-LABEL: load_i8_i32_z_with_folded_or_offset: 523; CHECK: i32.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} 524define i32 @load_i8_i32_z_with_folded_or_offset(i32 %x) { 525 %and = and i32 %x, -4 526 %t0 = inttoptr i32 %and to i8* 527 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 528 %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1 529 %conv = zext i8 %t1 to i32 530 ret i32 %conv 531} 532 533; CHECK-LABEL: load_i8_i64_z_with_folded_or_offset: 534; CHECK: i64.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} 535define i64 @load_i8_i64_z_with_folded_or_offset(i32 %x) { 536 %and = and i32 %x, -4 537 %t0 = inttoptr i32 %and to i8* 538 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 539 %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1 540 %conv = zext i8 %t1 to i64 541 ret i64 %conv 542} 543 544; When loading from a fixed address, materialize a zero. 545 546; CHECK-LABEL: load_i16_i32_z_from_numeric_address 547; CHECK: i32.const $push0=, 0{{$}} 548; CHECK: i32.atomic.load16_u $push1=, 42($pop0){{$}} 549define i32 @load_i16_i32_z_from_numeric_address() { 550 %s = inttoptr i32 42 to i16* 551 %t = load atomic i16, i16* %s seq_cst, align 2 552 %u = zext i16 %t to i32 553 ret i32 %u 554} 555 556; CHECK-LABEL: load_i8_i32_z_from_global_address 557; CHECK: i32.const $push0=, 0{{$}} 558; CHECK: i32.atomic.load8_u $push1=, gv8($pop0){{$}} 559define i32 @load_i8_i32_z_from_global_address() { 560 %t = load atomic i8, i8* @gv8 seq_cst, align 1 561 %u = zext i8 %t to i32 562 ret i32 %u 563} 564 565; i8 return value should test anyext loads 566 567; CHECK-LABEL: load_i8_i32_retvalue: 568; CHECK: i32.atomic.load8_u $push0=, 0($0){{$}} 569; CHECK-NEXT: return $pop0{{$}} 570define i8 @load_i8_i32_retvalue(i8 *%p) { 571 %v = load atomic i8, i8* %p seq_cst, align 1 572 ret i8 %v 573} 574 575;===---------------------------------------------------------------------------- 576; Atomic truncating stores 577;===---------------------------------------------------------------------------- 578 579; Fold an offset into a truncating store. 580 581; CHECK-LABEL: store_i8_i32_with_folded_offset: 582; CHECK: i32.atomic.store8 24($0), $1{{$}} 583define void @store_i8_i32_with_folded_offset(i8* %p, i32 %v) { 584 %q = ptrtoint i8* %p to i32 585 %r = add nuw i32 %q, 24 586 %s = inttoptr i32 %r to i8* 587 %t = trunc i32 %v to i8 588 store atomic i8 %t, i8* %s seq_cst, align 1 589 ret void 590} 591 592; CHECK-LABEL: store_i32_i64_with_folded_offset: 593; CHECK: i64.atomic.store32 24($0), $1{{$}} 594define void @store_i32_i64_with_folded_offset(i32* %p, i64 %v) { 595 %q = ptrtoint i32* %p to i32 596 %r = add nuw i32 %q, 24 597 %s = inttoptr i32 %r to i32* 598 %t = trunc i64 %v to i32 599 store atomic i32 %t, i32* %s seq_cst, align 4 600 ret void 601} 602 603; Fold a gep offset into a truncating store. 604 605; CHECK-LABEL: store_i8_i32_with_folded_gep_offset: 606; CHECK: i32.atomic.store8 24($0), $1{{$}} 607define void @store_i8_i32_with_folded_gep_offset(i8* %p, i32 %v) { 608 %s = getelementptr inbounds i8, i8* %p, i32 24 609 %t = trunc i32 %v to i8 610 store atomic i8 %t, i8* %s seq_cst, align 1 611 ret void 612} 613 614; CHECK-LABEL: store_i16_i32_with_folded_gep_offset: 615; CHECK: i32.atomic.store16 48($0), $1{{$}} 616define void @store_i16_i32_with_folded_gep_offset(i16* %p, i32 %v) { 617 %s = getelementptr inbounds i16, i16* %p, i32 24 618 %t = trunc i32 %v to i16 619 store atomic i16 %t, i16* %s seq_cst, align 2 620 ret void 621} 622 623; CHECK-LABEL: store_i16_i64_with_folded_gep_offset: 624; CHECK: i64.atomic.store16 48($0), $1{{$}} 625define void @store_i16_i64_with_folded_gep_offset(i16* %p, i64 %v) { 626 %s = getelementptr inbounds i16, i16* %p, i32 24 627 %t = trunc i64 %v to i16 628 store atomic i16 %t, i16* %s seq_cst, align 2 629 ret void 630} 631 632; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 633; an 'add' if the or'ed bits are known to be zero. 634 635; CHECK-LABEL: store_i8_i32_with_folded_or_offset: 636; CHECK: i32.atomic.store8 2($pop{{[0-9]+}}), $1{{$}} 637define void @store_i8_i32_with_folded_or_offset(i32 %x, i32 %v) { 638 %and = and i32 %x, -4 639 %p = inttoptr i32 %and to i8* 640 %arrayidx = getelementptr inbounds i8, i8* %p, i32 2 641 %t = trunc i32 %v to i8 642 store atomic i8 %t, i8* %arrayidx seq_cst, align 1 643 ret void 644} 645 646; CHECK-LABEL: store_i8_i64_with_folded_or_offset: 647; CHECK: i64.atomic.store8 2($pop{{[0-9]+}}), $1{{$}} 648define void @store_i8_i64_with_folded_or_offset(i32 %x, i64 %v) { 649 %and = and i32 %x, -4 650 %p = inttoptr i32 %and to i8* 651 %arrayidx = getelementptr inbounds i8, i8* %p, i32 2 652 %t = trunc i64 %v to i8 653 store atomic i8 %t, i8* %arrayidx seq_cst, align 1 654 ret void 655} 656 657;===---------------------------------------------------------------------------- 658; Atomic binary read-modify-writes: 32-bit 659;===---------------------------------------------------------------------------- 660 661; There are several RMW instructions, but here we only test 'add' as an example. 662 663; Basic RMW. 664 665; CHECK-LABEL: rmw_add_i32_no_offset: 666; CHECK-NEXT: .functype rmw_add_i32_no_offset (i32, i32) -> (i32){{$}} 667; CHECK: i32.atomic.rmw.add $push0=, 0($0), $1{{$}} 668; CHECK-NEXT: return $pop0{{$}} 669define i32 @rmw_add_i32_no_offset(i32* %p, i32 %v) { 670 %old = atomicrmw add i32* %p, i32 %v seq_cst 671 ret i32 %old 672} 673 674; With an nuw add, we can fold an offset. 675 676; CHECK-LABEL: rmw_add_i32_with_folded_offset: 677; CHECK: i32.atomic.rmw.add $push0=, 24($0), $1{{$}} 678define i32 @rmw_add_i32_with_folded_offset(i32* %p, i32 %v) { 679 %q = ptrtoint i32* %p to i32 680 %r = add nuw i32 %q, 24 681 %s = inttoptr i32 %r to i32* 682 %old = atomicrmw add i32* %s, i32 %v seq_cst 683 ret i32 %old 684} 685 686; With an inbounds gep, we can fold an offset. 687 688; CHECK-LABEL: rmw_add_i32_with_folded_gep_offset: 689; CHECK: i32.atomic.rmw.add $push0=, 24($0), $1{{$}} 690define i32 @rmw_add_i32_with_folded_gep_offset(i32* %p, i32 %v) { 691 %s = getelementptr inbounds i32, i32* %p, i32 6 692 %old = atomicrmw add i32* %s, i32 %v seq_cst 693 ret i32 %old 694} 695 696; We can't fold a negative offset though, even with an inbounds gep. 697 698; CHECK-LABEL: rmw_add_i32_with_unfolded_gep_negative_offset: 699; CHECK: i32.const $push0=, -24{{$}} 700; CHECK: i32.add $push1=, $0, $pop0{{$}} 701; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 702define i32 @rmw_add_i32_with_unfolded_gep_negative_offset(i32* %p, i32 %v) { 703 %s = getelementptr inbounds i32, i32* %p, i32 -6 704 %old = atomicrmw add i32* %s, i32 %v seq_cst 705 ret i32 %old 706} 707 708; Without nuw, and even with nsw, we can't fold an offset. 709 710; CHECK-LABEL: rmw_add_i32_with_unfolded_offset: 711; CHECK: i32.const $push0=, 24{{$}} 712; CHECK: i32.add $push1=, $0, $pop0{{$}} 713; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 714define i32 @rmw_add_i32_with_unfolded_offset(i32* %p, i32 %v) { 715 %q = ptrtoint i32* %p to i32 716 %r = add nsw i32 %q, 24 717 %s = inttoptr i32 %r to i32* 718 %old = atomicrmw add i32* %s, i32 %v seq_cst 719 ret i32 %old 720} 721 722; Without inbounds, we can't fold a gep offset. 723 724; CHECK-LABEL: rmw_add_i32_with_unfolded_gep_offset: 725; CHECK: i32.const $push0=, 24{{$}} 726; CHECK: i32.add $push1=, $0, $pop0{{$}} 727; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 728define i32 @rmw_add_i32_with_unfolded_gep_offset(i32* %p, i32 %v) { 729 %s = getelementptr i32, i32* %p, i32 6 730 %old = atomicrmw add i32* %s, i32 %v seq_cst 731 ret i32 %old 732} 733 734; When loading from a fixed address, materialize a zero. 735 736; CHECK-LABEL: rmw_add_i32_from_numeric_address 737; CHECK: i32.const $push0=, 0{{$}} 738; CHECK: i32.atomic.rmw.add $push1=, 42($pop0), $0{{$}} 739define i32 @rmw_add_i32_from_numeric_address(i32 %v) { 740 %s = inttoptr i32 42 to i32* 741 %old = atomicrmw add i32* %s, i32 %v seq_cst 742 ret i32 %old 743} 744 745; CHECK-LABEL: rmw_add_i32_from_global_address 746; CHECK: i32.const $push0=, 0{{$}} 747; CHECK: i32.atomic.rmw.add $push1=, gv($pop0), $0{{$}} 748define i32 @rmw_add_i32_from_global_address(i32 %v) { 749 %old = atomicrmw add i32* @gv, i32 %v seq_cst 750 ret i32 %old 751} 752 753;===---------------------------------------------------------------------------- 754; Atomic binary read-modify-writes: 64-bit 755;===---------------------------------------------------------------------------- 756 757; Basic RMW. 758 759; CHECK-LABEL: rmw_add_i64_no_offset: 760; CHECK-NEXT: .functype rmw_add_i64_no_offset (i32, i64) -> (i64){{$}} 761; CHECK: i64.atomic.rmw.add $push0=, 0($0), $1{{$}} 762; CHECK-NEXT: return $pop0{{$}} 763define i64 @rmw_add_i64_no_offset(i64* %p, i64 %v) { 764 %old = atomicrmw add i64* %p, i64 %v seq_cst 765 ret i64 %old 766} 767 768; With an nuw add, we can fold an offset. 769 770; CHECK-LABEL: rmw_add_i64_with_folded_offset: 771; CHECK: i64.atomic.rmw.add $push0=, 24($0), $1{{$}} 772define i64 @rmw_add_i64_with_folded_offset(i64* %p, i64 %v) { 773 %q = ptrtoint i64* %p to i32 774 %r = add nuw i32 %q, 24 775 %s = inttoptr i32 %r to i64* 776 %old = atomicrmw add i64* %s, i64 %v seq_cst 777 ret i64 %old 778} 779 780; With an inbounds gep, we can fold an offset. 781 782; CHECK-LABEL: rmw_add_i64_with_folded_gep_offset: 783; CHECK: i64.atomic.rmw.add $push0=, 24($0), $1{{$}} 784define i64 @rmw_add_i64_with_folded_gep_offset(i64* %p, i64 %v) { 785 %s = getelementptr inbounds i64, i64* %p, i32 3 786 %old = atomicrmw add i64* %s, i64 %v seq_cst 787 ret i64 %old 788} 789 790; We can't fold a negative offset though, even with an inbounds gep. 791 792; CHECK-LABEL: rmw_add_i64_with_unfolded_gep_negative_offset: 793; CHECK: i32.const $push0=, -24{{$}} 794; CHECK: i32.add $push1=, $0, $pop0{{$}} 795; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 796define i64 @rmw_add_i64_with_unfolded_gep_negative_offset(i64* %p, i64 %v) { 797 %s = getelementptr inbounds i64, i64* %p, i32 -3 798 %old = atomicrmw add i64* %s, i64 %v seq_cst 799 ret i64 %old 800} 801 802; Without nuw, and even with nsw, we can't fold an offset. 803 804; CHECK-LABEL: rmw_add_i64_with_unfolded_offset: 805; CHECK: i32.const $push0=, 24{{$}} 806; CHECK: i32.add $push1=, $0, $pop0{{$}} 807; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 808define i64 @rmw_add_i64_with_unfolded_offset(i64* %p, i64 %v) { 809 %q = ptrtoint i64* %p to i32 810 %r = add nsw i32 %q, 24 811 %s = inttoptr i32 %r to i64* 812 %old = atomicrmw add i64* %s, i64 %v seq_cst 813 ret i64 %old 814} 815 816; Without inbounds, we can't fold a gep offset. 817 818; CHECK-LABEL: rmw_add_i64_with_unfolded_gep_offset: 819; CHECK: i32.const $push0=, 24{{$}} 820; CHECK: i32.add $push1=, $0, $pop0{{$}} 821; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 822define i64 @rmw_add_i64_with_unfolded_gep_offset(i64* %p, i64 %v) { 823 %s = getelementptr i64, i64* %p, i32 3 824 %old = atomicrmw add i64* %s, i64 %v seq_cst 825 ret i64 %old 826} 827 828;===---------------------------------------------------------------------------- 829; Atomic truncating & sign-extending binary RMWs 830;===---------------------------------------------------------------------------- 831 832; Fold an offset into a sign-extending rmw. 833 834; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_offset: 835; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}} 836; CHECK-NEXT: i32.extend8_s $push1=, $pop0 837define i32 @rmw_add_i8_i32_s_with_folded_offset(i8* %p, i32 %v) { 838 %q = ptrtoint i8* %p to i32 839 %r = add nuw i32 %q, 24 840 %s = inttoptr i32 %r to i8* 841 %t = trunc i32 %v to i8 842 %old = atomicrmw add i8* %s, i8 %t seq_cst 843 %u = sext i8 %old to i32 844 ret i32 %u 845} 846 847; 32->64 sext rmw gets selected as i32.atomic.rmw.add, i64.extend_i32_s 848; CHECK-LABEL: rmw_add_i32_i64_s_with_folded_offset: 849; CHECK: i32.wrap_i64 $push0=, $1 850; CHECK-NEXT: i32.atomic.rmw.add $push1=, 24($0), $pop0{{$}} 851; CHECK-NEXT: i64.extend_i32_s $push2=, $pop1{{$}} 852define i64 @rmw_add_i32_i64_s_with_folded_offset(i32* %p, i64 %v) { 853 %q = ptrtoint i32* %p to i32 854 %r = add nuw i32 %q, 24 855 %s = inttoptr i32 %r to i32* 856 %t = trunc i64 %v to i32 857 %old = atomicrmw add i32* %s, i32 %t seq_cst 858 %u = sext i32 %old to i64 859 ret i64 %u 860} 861 862; Fold a gep offset into a sign-extending rmw. 863 864; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_gep_offset: 865; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}} 866; CHECK-NEXT: i32.extend8_s $push1=, $pop0 867define i32 @rmw_add_i8_i32_s_with_folded_gep_offset(i8* %p, i32 %v) { 868 %s = getelementptr inbounds i8, i8* %p, i32 24 869 %t = trunc i32 %v to i8 870 %old = atomicrmw add i8* %s, i8 %t seq_cst 871 %u = sext i8 %old to i32 872 ret i32 %u 873} 874 875; CHECK-LABEL: rmw_add_i16_i32_s_with_folded_gep_offset: 876; CHECK: i32.atomic.rmw16.add_u $push0=, 48($0), $1{{$}} 877; CHECK-NEXT: i32.extend16_s $push1=, $pop0 878define i32 @rmw_add_i16_i32_s_with_folded_gep_offset(i16* %p, i32 %v) { 879 %s = getelementptr inbounds i16, i16* %p, i32 24 880 %t = trunc i32 %v to i16 881 %old = atomicrmw add i16* %s, i16 %t seq_cst 882 %u = sext i16 %old to i32 883 ret i32 %u 884} 885 886; CHECK-LABEL: rmw_add_i16_i64_s_with_folded_gep_offset: 887; CHECK: i64.atomic.rmw16.add_u $push0=, 48($0), $1{{$}} 888; CHECK-NEXT: i64.extend16_s $push1=, $pop0 889define i64 @rmw_add_i16_i64_s_with_folded_gep_offset(i16* %p, i64 %v) { 890 %s = getelementptr inbounds i16, i16* %p, i32 24 891 %t = trunc i64 %v to i16 892 %old = atomicrmw add i16* %s, i16 %t seq_cst 893 %u = sext i16 %old to i64 894 ret i64 %u 895} 896 897; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 898; an 'add' if the or'ed bits are known to be zero. 899 900; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_or_offset: 901; CHECK: i32.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} 902; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 903define i32 @rmw_add_i8_i32_s_with_folded_or_offset(i32 %x, i32 %v) { 904 %and = and i32 %x, -4 905 %t0 = inttoptr i32 %and to i8* 906 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 907 %t = trunc i32 %v to i8 908 %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst 909 %conv = sext i8 %old to i32 910 ret i32 %conv 911} 912 913; CHECK-LABEL: rmw_add_i8_i64_s_with_folded_or_offset: 914; CHECK: i64.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} 915; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 916define i64 @rmw_add_i8_i64_s_with_folded_or_offset(i32 %x, i64 %v) { 917 %and = and i32 %x, -4 918 %t0 = inttoptr i32 %and to i8* 919 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 920 %t = trunc i64 %v to i8 921 %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst 922 %conv = sext i8 %old to i64 923 ret i64 %conv 924} 925 926; When loading from a fixed address, materialize a zero. 927 928; CHECK-LABEL: rmw_add_i16_i32_s_from_numeric_address 929; CHECK: i32.const $push0=, 0{{$}} 930; CHECK: i32.atomic.rmw16.add_u $push1=, 42($pop0), $0{{$}} 931; CHECK-NEXT: i32.extend16_s $push2=, $pop1 932define i32 @rmw_add_i16_i32_s_from_numeric_address(i32 %v) { 933 %s = inttoptr i32 42 to i16* 934 %t = trunc i32 %v to i16 935 %old = atomicrmw add i16* %s, i16 %t seq_cst 936 %u = sext i16 %old to i32 937 ret i32 %u 938} 939 940; CHECK-LABEL: rmw_add_i8_i32_s_from_global_address 941; CHECK: i32.const $push0=, 0{{$}} 942; CHECK: i32.atomic.rmw8.add_u $push1=, gv8($pop0), $0{{$}} 943; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}} 944define i32 @rmw_add_i8_i32_s_from_global_address(i32 %v) { 945 %t = trunc i32 %v to i8 946 %old = atomicrmw add i8* @gv8, i8 %t seq_cst 947 %u = sext i8 %old to i32 948 ret i32 %u 949} 950 951;===---------------------------------------------------------------------------- 952; Atomic truncating & zero-extending binary RMWs 953;===---------------------------------------------------------------------------- 954 955; Fold an offset into a zero-extending rmw. 956 957; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_offset: 958; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}} 959define i32 @rmw_add_i8_i32_z_with_folded_offset(i8* %p, i32 %v) { 960 %q = ptrtoint i8* %p to i32 961 %r = add nuw i32 %q, 24 962 %s = inttoptr i32 %r to i8* 963 %t = trunc i32 %v to i8 964 %old = atomicrmw add i8* %s, i8 %t seq_cst 965 %u = zext i8 %old to i32 966 ret i32 %u 967} 968 969; CHECK-LABEL: rmw_add_i32_i64_z_with_folded_offset: 970; CHECK: i64.atomic.rmw32.add_u $push0=, 24($0), $1{{$}} 971define i64 @rmw_add_i32_i64_z_with_folded_offset(i32* %p, i64 %v) { 972 %q = ptrtoint i32* %p to i32 973 %r = add nuw i32 %q, 24 974 %s = inttoptr i32 %r to i32* 975 %t = trunc i64 %v to i32 976 %old = atomicrmw add i32* %s, i32 %t seq_cst 977 %u = zext i32 %old to i64 978 ret i64 %u 979} 980 981; Fold a gep offset into a zero-extending rmw. 982 983; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_gep_offset: 984; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}} 985define i32 @rmw_add_i8_i32_z_with_folded_gep_offset(i8* %p, i32 %v) { 986 %s = getelementptr inbounds i8, i8* %p, i32 24 987 %t = trunc i32 %v to i8 988 %old = atomicrmw add i8* %s, i8 %t seq_cst 989 %u = zext i8 %old to i32 990 ret i32 %u 991} 992 993; CHECK-LABEL: rmw_add_i16_i32_z_with_folded_gep_offset: 994; CHECK: i32.atomic.rmw16.add_u $push0=, 48($0), $1{{$}} 995define i32 @rmw_add_i16_i32_z_with_folded_gep_offset(i16* %p, i32 %v) { 996 %s = getelementptr inbounds i16, i16* %p, i32 24 997 %t = trunc i32 %v to i16 998 %old = atomicrmw add i16* %s, i16 %t seq_cst 999 %u = zext i16 %old to i32 1000 ret i32 %u 1001} 1002 1003; CHECK-LABEL: rmw_add_i16_i64_z_with_folded_gep_offset: 1004; CHECK: i64.atomic.rmw16.add_u $push0=, 48($0), $1{{$}} 1005define i64 @rmw_add_i16_i64_z_with_folded_gep_offset(i16* %p, i64 %v) { 1006 %s = getelementptr inbounds i16, i16* %p, i32 24 1007 %t = trunc i64 %v to i16 1008 %old = atomicrmw add i16* %s, i16 %t seq_cst 1009 %u = zext i16 %old to i64 1010 ret i64 %u 1011} 1012 1013; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 1014; an 'add' if the or'ed bits are known to be zero. 1015 1016; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_or_offset: 1017; CHECK: i32.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} 1018define i32 @rmw_add_i8_i32_z_with_folded_or_offset(i32 %x, i32 %v) { 1019 %and = and i32 %x, -4 1020 %t0 = inttoptr i32 %and to i8* 1021 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 1022 %t = trunc i32 %v to i8 1023 %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst 1024 %conv = zext i8 %old to i32 1025 ret i32 %conv 1026} 1027 1028; CHECK-LABEL: rmw_add_i8_i64_z_with_folded_or_offset: 1029; CHECK: i64.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} 1030define i64 @rmw_add_i8_i64_z_with_folded_or_offset(i32 %x, i64 %v) { 1031 %and = and i32 %x, -4 1032 %t0 = inttoptr i32 %and to i8* 1033 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 1034 %t = trunc i64 %v to i8 1035 %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst 1036 %conv = zext i8 %old to i64 1037 ret i64 %conv 1038} 1039 1040; When loading from a fixed address, materialize a zero. 1041 1042; CHECK-LABEL: rmw_add_i16_i32_z_from_numeric_address 1043; CHECK: i32.const $push0=, 0{{$}} 1044; CHECK: i32.atomic.rmw16.add_u $push1=, 42($pop0), $0{{$}} 1045define i32 @rmw_add_i16_i32_z_from_numeric_address(i32 %v) { 1046 %s = inttoptr i32 42 to i16* 1047 %t = trunc i32 %v to i16 1048 %old = atomicrmw add i16* %s, i16 %t seq_cst 1049 %u = zext i16 %old to i32 1050 ret i32 %u 1051} 1052 1053; CHECK-LABEL: rmw_add_i8_i32_z_from_global_address 1054; CHECK: i32.const $push0=, 0{{$}} 1055; CHECK: i32.atomic.rmw8.add_u $push1=, gv8($pop0), $0{{$}} 1056define i32 @rmw_add_i8_i32_z_from_global_address(i32 %v) { 1057 %t = trunc i32 %v to i8 1058 %old = atomicrmw add i8* @gv8, i8 %t seq_cst 1059 %u = zext i8 %old to i32 1060 ret i32 %u 1061} 1062 1063; i8 return value should test anyext RMWs 1064 1065; CHECK-LABEL: rmw_add_i8_i32_retvalue: 1066; CHECK: i32.atomic.rmw8.add_u $push0=, 0($0), $1{{$}} 1067; CHECK-NEXT: return $pop0{{$}} 1068define i8 @rmw_add_i8_i32_retvalue(i8 *%p, i32 %v) { 1069 %t = trunc i32 %v to i8 1070 %old = atomicrmw add i8* %p, i8 %t seq_cst 1071 ret i8 %old 1072} 1073 1074;===---------------------------------------------------------------------------- 1075; Atomic ternary read-modify-writes: 32-bit 1076;===---------------------------------------------------------------------------- 1077 1078; Basic RMW. 1079 1080; CHECK-LABEL: cmpxchg_i32_no_offset: 1081; CHECK-NEXT: .functype cmpxchg_i32_no_offset (i32, i32, i32) -> (i32){{$}} 1082; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} 1083; CHECK-NEXT: return $pop0{{$}} 1084define i32 @cmpxchg_i32_no_offset(i32* %p, i32 %exp, i32 %new) { 1085 %pair = cmpxchg i32* %p, i32 %exp, i32 %new seq_cst seq_cst 1086 %old = extractvalue { i32, i1 } %pair, 0 1087 ret i32 %old 1088} 1089 1090; With an nuw add, we can fold an offset. 1091 1092; CHECK-LABEL: cmpxchg_i32_with_folded_offset: 1093; CHECK: i32.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}} 1094define i32 @cmpxchg_i32_with_folded_offset(i32* %p, i32 %exp, i32 %new) { 1095 %q = ptrtoint i32* %p to i32 1096 %r = add nuw i32 %q, 24 1097 %s = inttoptr i32 %r to i32* 1098 %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst 1099 %old = extractvalue { i32, i1 } %pair, 0 1100 ret i32 %old 1101} 1102 1103; With an inbounds gep, we can fold an offset. 1104 1105; CHECK-LABEL: cmpxchg_i32_with_folded_gep_offset: 1106; CHECK: i32.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}} 1107define i32 @cmpxchg_i32_with_folded_gep_offset(i32* %p, i32 %exp, i32 %new) { 1108 %s = getelementptr inbounds i32, i32* %p, i32 6 1109 %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst 1110 %old = extractvalue { i32, i1 } %pair, 0 1111 ret i32 %old 1112} 1113 1114; We can't fold a negative offset though, even with an inbounds gep. 1115 1116; CHECK-LABEL: cmpxchg_i32_with_unfolded_gep_negative_offset: 1117; CHECK: i32.const $push0=, -24{{$}} 1118; CHECK: i32.add $push1=, $0, $pop0{{$}} 1119; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} 1120define i32 @cmpxchg_i32_with_unfolded_gep_negative_offset(i32* %p, i32 %exp, i32 %new) { 1121 %s = getelementptr inbounds i32, i32* %p, i32 -6 1122 %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst 1123 %old = extractvalue { i32, i1 } %pair, 0 1124 ret i32 %old 1125} 1126 1127; Without nuw, and even with nsw, we can't fold an offset. 1128 1129; CHECK-LABEL: cmpxchg_i32_with_unfolded_offset: 1130; CHECK: i32.const $push0=, 24{{$}} 1131; CHECK: i32.add $push1=, $0, $pop0{{$}} 1132; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} 1133define i32 @cmpxchg_i32_with_unfolded_offset(i32* %p, i32 %exp, i32 %new) { 1134 %q = ptrtoint i32* %p to i32 1135 %r = add nsw i32 %q, 24 1136 %s = inttoptr i32 %r to i32* 1137 %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst 1138 %old = extractvalue { i32, i1 } %pair, 0 1139 ret i32 %old 1140} 1141 1142; Without inbounds, we can't fold a gep offset. 1143 1144; CHECK-LABEL: cmpxchg_i32_with_unfolded_gep_offset: 1145; CHECK: i32.const $push0=, 24{{$}} 1146; CHECK: i32.add $push1=, $0, $pop0{{$}} 1147; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} 1148define i32 @cmpxchg_i32_with_unfolded_gep_offset(i32* %p, i32 %exp, i32 %new) { 1149 %s = getelementptr i32, i32* %p, i32 6 1150 %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst 1151 %old = extractvalue { i32, i1 } %pair, 0 1152 ret i32 %old 1153} 1154 1155; When loading from a fixed address, materialize a zero. 1156 1157; CHECK-LABEL: cmpxchg_i32_from_numeric_address 1158; CHECK: i32.const $push0=, 0{{$}} 1159; CHECK: i32.atomic.rmw.cmpxchg $push1=, 42($pop0), $0, $1{{$}} 1160define i32 @cmpxchg_i32_from_numeric_address(i32 %exp, i32 %new) { 1161 %s = inttoptr i32 42 to i32* 1162 %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst 1163 %old = extractvalue { i32, i1 } %pair, 0 1164 ret i32 %old 1165} 1166 1167; CHECK-LABEL: cmpxchg_i32_from_global_address 1168; CHECK: i32.const $push0=, 0{{$}} 1169; CHECK: i32.atomic.rmw.cmpxchg $push1=, gv($pop0), $0, $1{{$}} 1170define i32 @cmpxchg_i32_from_global_address(i32 %exp, i32 %new) { 1171 %pair = cmpxchg i32* @gv, i32 %exp, i32 %new seq_cst seq_cst 1172 %old = extractvalue { i32, i1 } %pair, 0 1173 ret i32 %old 1174} 1175 1176;===---------------------------------------------------------------------------- 1177; Atomic ternary read-modify-writes: 64-bit 1178;===---------------------------------------------------------------------------- 1179 1180; Basic RMW. 1181 1182; CHECK-LABEL: cmpxchg_i64_no_offset: 1183; CHECK-NEXT: .functype cmpxchg_i64_no_offset (i32, i64, i64) -> (i64){{$}} 1184; CHECK: i64.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} 1185; CHECK-NEXT: return $pop0{{$}} 1186define i64 @cmpxchg_i64_no_offset(i64* %p, i64 %exp, i64 %new) { 1187 %pair = cmpxchg i64* %p, i64 %exp, i64 %new seq_cst seq_cst 1188 %old = extractvalue { i64, i1 } %pair, 0 1189 ret i64 %old 1190} 1191 1192; With an nuw add, we can fold an offset. 1193 1194; CHECK-LABEL: cmpxchg_i64_with_folded_offset: 1195; CHECK: i64.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}} 1196define i64 @cmpxchg_i64_with_folded_offset(i64* %p, i64 %exp, i64 %new) { 1197 %q = ptrtoint i64* %p to i32 1198 %r = add nuw i32 %q, 24 1199 %s = inttoptr i32 %r to i64* 1200 %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst 1201 %old = extractvalue { i64, i1 } %pair, 0 1202 ret i64 %old 1203} 1204 1205; With an inbounds gep, we can fold an offset. 1206 1207; CHECK-LABEL: cmpxchg_i64_with_folded_gep_offset: 1208; CHECK: i64.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}} 1209define i64 @cmpxchg_i64_with_folded_gep_offset(i64* %p, i64 %exp, i64 %new) { 1210 %s = getelementptr inbounds i64, i64* %p, i32 3 1211 %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst 1212 %old = extractvalue { i64, i1 } %pair, 0 1213 ret i64 %old 1214} 1215 1216; We can't fold a negative offset though, even with an inbounds gep. 1217 1218; CHECK-LABEL: cmpxchg_i64_with_unfolded_gep_negative_offset: 1219; CHECK: i32.const $push0=, -24{{$}} 1220; CHECK: i32.add $push1=, $0, $pop0{{$}} 1221; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} 1222define i64 @cmpxchg_i64_with_unfolded_gep_negative_offset(i64* %p, i64 %exp, i64 %new) { 1223 %s = getelementptr inbounds i64, i64* %p, i32 -3 1224 %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst 1225 %old = extractvalue { i64, i1 } %pair, 0 1226 ret i64 %old 1227} 1228 1229; Without nuw, and even with nsw, we can't fold an offset. 1230 1231; CHECK-LABEL: cmpxchg_i64_with_unfolded_offset: 1232; CHECK: i32.const $push0=, 24{{$}} 1233; CHECK: i32.add $push1=, $0, $pop0{{$}} 1234; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} 1235define i64 @cmpxchg_i64_with_unfolded_offset(i64* %p, i64 %exp, i64 %new) { 1236 %q = ptrtoint i64* %p to i32 1237 %r = add nsw i32 %q, 24 1238 %s = inttoptr i32 %r to i64* 1239 %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst 1240 %old = extractvalue { i64, i1 } %pair, 0 1241 ret i64 %old 1242} 1243 1244; Without inbounds, we can't fold a gep offset. 1245 1246; CHECK-LABEL: cmpxchg_i64_with_unfolded_gep_offset: 1247; CHECK: i32.const $push0=, 24{{$}} 1248; CHECK: i32.add $push1=, $0, $pop0{{$}} 1249; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} 1250define i64 @cmpxchg_i64_with_unfolded_gep_offset(i64* %p, i64 %exp, i64 %new) { 1251 %s = getelementptr i64, i64* %p, i32 3 1252 %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst 1253 %old = extractvalue { i64, i1 } %pair, 0 1254 ret i64 %old 1255} 1256 1257;===---------------------------------------------------------------------------- 1258; Atomic truncating & sign-extending ternary RMWs 1259;===---------------------------------------------------------------------------- 1260 1261; Fold an offset into a sign-extending rmw. 1262 1263; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_offset: 1264; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}} 1265; CHECK-NEXT: i32.extend8_s $push1=, $pop0 1266define i32 @cmpxchg_i8_i32_s_with_folded_offset(i8* %p, i32 %exp, i32 %new) { 1267 %q = ptrtoint i8* %p to i32 1268 %r = add nuw i32 %q, 24 1269 %s = inttoptr i32 %r to i8* 1270 %exp_t = trunc i32 %exp to i8 1271 %new_t = trunc i32 %new to i8 1272 %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst 1273 %old = extractvalue { i8, i1 } %pair, 0 1274 %u = sext i8 %old to i32 1275 ret i32 %u 1276} 1277 1278; 32->64 sext rmw gets selected as i32.atomic.rmw.cmpxchg, i64.extend_i32_s 1279; CHECK-LABEL: cmpxchg_i32_i64_s_with_folded_offset: 1280; CHECK: i32.wrap_i64 $push1=, $1 1281; CHECK-NEXT: i32.wrap_i64 $push0=, $2 1282; CHECK-NEXT: i32.atomic.rmw.cmpxchg $push2=, 24($0), $pop1, $pop0{{$}} 1283; CHECK-NEXT: i64.extend_i32_s $push3=, $pop2{{$}} 1284define i64 @cmpxchg_i32_i64_s_with_folded_offset(i32* %p, i64 %exp, i64 %new) { 1285 %q = ptrtoint i32* %p to i32 1286 %r = add nuw i32 %q, 24 1287 %s = inttoptr i32 %r to i32* 1288 %exp_t = trunc i64 %exp to i32 1289 %new_t = trunc i64 %new to i32 1290 %pair = cmpxchg i32* %s, i32 %exp_t, i32 %new_t seq_cst seq_cst 1291 %old = extractvalue { i32, i1 } %pair, 0 1292 %u = sext i32 %old to i64 1293 ret i64 %u 1294} 1295 1296; Fold a gep offset into a sign-extending rmw. 1297 1298; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_gep_offset: 1299; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}} 1300; CHECK-NEXT: i32.extend8_s $push1=, $pop0 1301define i32 @cmpxchg_i8_i32_s_with_folded_gep_offset(i8* %p, i32 %exp, i32 %new) { 1302 %s = getelementptr inbounds i8, i8* %p, i32 24 1303 %exp_t = trunc i32 %exp to i8 1304 %new_t = trunc i32 %new to i8 1305 %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst 1306 %old = extractvalue { i8, i1 } %pair, 0 1307 %u = sext i8 %old to i32 1308 ret i32 %u 1309} 1310 1311; CHECK-LABEL: cmpxchg_i16_i32_s_with_folded_gep_offset: 1312; CHECK: i32.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}} 1313; CHECK-NEXT: i32.extend16_s $push1=, $pop0 1314define i32 @cmpxchg_i16_i32_s_with_folded_gep_offset(i16* %p, i32 %exp, i32 %new) { 1315 %s = getelementptr inbounds i16, i16* %p, i32 24 1316 %exp_t = trunc i32 %exp to i16 1317 %new_t = trunc i32 %new to i16 1318 %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst 1319 %old = extractvalue { i16, i1 } %pair, 0 1320 %u = sext i16 %old to i32 1321 ret i32 %u 1322} 1323 1324; CHECK-LABEL: cmpxchg_i16_i64_s_with_folded_gep_offset: 1325; CHECK: i64.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}} 1326; CHECK-NEXT: i64.extend16_s $push1=, $pop0 1327define i64 @cmpxchg_i16_i64_s_with_folded_gep_offset(i16* %p, i64 %exp, i64 %new) { 1328 %s = getelementptr inbounds i16, i16* %p, i32 24 1329 %exp_t = trunc i64 %exp to i16 1330 %new_t = trunc i64 %new to i16 1331 %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst 1332 %old = extractvalue { i16, i1 } %pair, 0 1333 %u = sext i16 %old to i64 1334 ret i64 %u 1335} 1336 1337; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 1338; an 'add' if the or'ed bits are known to be zero. 1339 1340; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_or_offset: 1341; CHECK: i32.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}} 1342; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 1343define i32 @cmpxchg_i8_i32_s_with_folded_or_offset(i32 %x, i32 %exp, i32 %new) { 1344 %and = and i32 %x, -4 1345 %t0 = inttoptr i32 %and to i8* 1346 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 1347 %exp_t = trunc i32 %exp to i8 1348 %new_t = trunc i32 %new to i8 1349 %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst 1350 %old = extractvalue { i8, i1 } %pair, 0 1351 %conv = sext i8 %old to i32 1352 ret i32 %conv 1353} 1354 1355; CHECK-LABEL: cmpxchg_i8_i64_s_with_folded_or_offset: 1356; CHECK: i64.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}} 1357; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 1358define i64 @cmpxchg_i8_i64_s_with_folded_or_offset(i32 %x, i64 %exp, i64 %new) { 1359 %and = and i32 %x, -4 1360 %t0 = inttoptr i32 %and to i8* 1361 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 1362 %exp_t = trunc i64 %exp to i8 1363 %new_t = trunc i64 %new to i8 1364 %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst 1365 %old = extractvalue { i8, i1 } %pair, 0 1366 %conv = sext i8 %old to i64 1367 ret i64 %conv 1368} 1369 1370; When loading from a fixed address, materialize a zero. 1371 1372; CHECK-LABEL: cmpxchg_i16_i32_s_from_numeric_address 1373; CHECK: i32.const $push0=, 0{{$}} 1374; CHECK: i32.atomic.rmw16.cmpxchg_u $push1=, 42($pop0), $0, $1{{$}} 1375; CHECK-NEXT: i32.extend16_s $push2=, $pop1 1376define i32 @cmpxchg_i16_i32_s_from_numeric_address(i32 %exp, i32 %new) { 1377 %s = inttoptr i32 42 to i16* 1378 %exp_t = trunc i32 %exp to i16 1379 %new_t = trunc i32 %new to i16 1380 %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst 1381 %old = extractvalue { i16, i1 } %pair, 0 1382 %u = sext i16 %old to i32 1383 ret i32 %u 1384} 1385 1386; CHECK-LABEL: cmpxchg_i8_i32_s_from_global_address 1387; CHECK: i32.const $push0=, 0{{$}} 1388; CHECK: i32.atomic.rmw8.cmpxchg_u $push1=, gv8($pop0), $0, $1{{$}} 1389; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}} 1390define i32 @cmpxchg_i8_i32_s_from_global_address(i32 %exp, i32 %new) { 1391 %exp_t = trunc i32 %exp to i8 1392 %new_t = trunc i32 %new to i8 1393 %pair = cmpxchg i8* @gv8, i8 %exp_t, i8 %new_t seq_cst seq_cst 1394 %old = extractvalue { i8, i1 } %pair, 0 1395 %u = sext i8 %old to i32 1396 ret i32 %u 1397} 1398 1399;===---------------------------------------------------------------------------- 1400; Atomic truncating & zero-extending ternary RMWs 1401;===---------------------------------------------------------------------------- 1402 1403; Fold an offset into a sign-extending rmw. 1404 1405; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_offset: 1406; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}} 1407define i32 @cmpxchg_i8_i32_z_with_folded_offset(i8* %p, i32 %exp, i32 %new) { 1408 %q = ptrtoint i8* %p to i32 1409 %r = add nuw i32 %q, 24 1410 %s = inttoptr i32 %r to i8* 1411 %exp_t = trunc i32 %exp to i8 1412 %new_t = trunc i32 %new to i8 1413 %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst 1414 %old = extractvalue { i8, i1 } %pair, 0 1415 %u = zext i8 %old to i32 1416 ret i32 %u 1417} 1418 1419; CHECK-LABEL: cmpxchg_i32_i64_z_with_folded_offset: 1420; CHECK: i64.atomic.rmw32.cmpxchg_u $push0=, 24($0), $1, $2{{$}} 1421define i64 @cmpxchg_i32_i64_z_with_folded_offset(i32* %p, i64 %exp, i64 %new) { 1422 %q = ptrtoint i32* %p to i32 1423 %r = add nuw i32 %q, 24 1424 %s = inttoptr i32 %r to i32* 1425 %exp_t = trunc i64 %exp to i32 1426 %new_t = trunc i64 %new to i32 1427 %pair = cmpxchg i32* %s, i32 %exp_t, i32 %new_t seq_cst seq_cst 1428 %old = extractvalue { i32, i1 } %pair, 0 1429 %u = zext i32 %old to i64 1430 ret i64 %u 1431} 1432 1433; Fold a gep offset into a sign-extending rmw. 1434 1435; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_gep_offset: 1436; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}} 1437define i32 @cmpxchg_i8_i32_z_with_folded_gep_offset(i8* %p, i32 %exp, i32 %new) { 1438 %s = getelementptr inbounds i8, i8* %p, i32 24 1439 %exp_t = trunc i32 %exp to i8 1440 %new_t = trunc i32 %new to i8 1441 %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst 1442 %old = extractvalue { i8, i1 } %pair, 0 1443 %u = zext i8 %old to i32 1444 ret i32 %u 1445} 1446 1447; CHECK-LABEL: cmpxchg_i16_i32_z_with_folded_gep_offset: 1448; CHECK: i32.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}} 1449define i32 @cmpxchg_i16_i32_z_with_folded_gep_offset(i16* %p, i32 %exp, i32 %new) { 1450 %s = getelementptr inbounds i16, i16* %p, i32 24 1451 %exp_t = trunc i32 %exp to i16 1452 %new_t = trunc i32 %new to i16 1453 %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst 1454 %old = extractvalue { i16, i1 } %pair, 0 1455 %u = zext i16 %old to i32 1456 ret i32 %u 1457} 1458 1459; CHECK-LABEL: cmpxchg_i16_i64_z_with_folded_gep_offset: 1460; CHECK: i64.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}} 1461define i64 @cmpxchg_i16_i64_z_with_folded_gep_offset(i16* %p, i64 %exp, i64 %new) { 1462 %s = getelementptr inbounds i16, i16* %p, i32 24 1463 %exp_t = trunc i64 %exp to i16 1464 %new_t = trunc i64 %new to i16 1465 %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst 1466 %old = extractvalue { i16, i1 } %pair, 0 1467 %u = zext i16 %old to i64 1468 ret i64 %u 1469} 1470 1471; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 1472; an 'add' if the or'ed bits are known to be zero. 1473 1474; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_or_offset: 1475; CHECK: i32.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}} 1476define i32 @cmpxchg_i8_i32_z_with_folded_or_offset(i32 %x, i32 %exp, i32 %new) { 1477 %and = and i32 %x, -4 1478 %t0 = inttoptr i32 %and to i8* 1479 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 1480 %exp_t = trunc i32 %exp to i8 1481 %new_t = trunc i32 %new to i8 1482 %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst 1483 %old = extractvalue { i8, i1 } %pair, 0 1484 %conv = zext i8 %old to i32 1485 ret i32 %conv 1486} 1487 1488; CHECK-LABEL: cmpxchg_i8_i64_z_with_folded_or_offset: 1489; CHECK: i64.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}} 1490define i64 @cmpxchg_i8_i64_z_with_folded_or_offset(i32 %x, i64 %exp, i64 %new) { 1491 %and = and i32 %x, -4 1492 %t0 = inttoptr i32 %and to i8* 1493 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 1494 %exp_t = trunc i64 %exp to i8 1495 %new_t = trunc i64 %new to i8 1496 %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst 1497 %old = extractvalue { i8, i1 } %pair, 0 1498 %conv = zext i8 %old to i64 1499 ret i64 %conv 1500} 1501 1502; When loading from a fixed address, materialize a zero. 1503 1504; CHECK-LABEL: cmpxchg_i16_i32_z_from_numeric_address 1505; CHECK: i32.const $push0=, 0{{$}} 1506; CHECK: i32.atomic.rmw16.cmpxchg_u $push1=, 42($pop0), $0, $1{{$}} 1507define i32 @cmpxchg_i16_i32_z_from_numeric_address(i32 %exp, i32 %new) { 1508 %s = inttoptr i32 42 to i16* 1509 %exp_t = trunc i32 %exp to i16 1510 %new_t = trunc i32 %new to i16 1511 %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst 1512 %old = extractvalue { i16, i1 } %pair, 0 1513 %u = zext i16 %old to i32 1514 ret i32 %u 1515} 1516 1517; CHECK-LABEL: cmpxchg_i8_i32_z_from_global_address 1518; CHECK: i32.const $push0=, 0{{$}} 1519; CHECK: i32.atomic.rmw8.cmpxchg_u $push1=, gv8($pop0), $0, $1{{$}} 1520define i32 @cmpxchg_i8_i32_z_from_global_address(i32 %exp, i32 %new) { 1521 %exp_t = trunc i32 %exp to i8 1522 %new_t = trunc i32 %new to i8 1523 %pair = cmpxchg i8* @gv8, i8 %exp_t, i8 %new_t seq_cst seq_cst 1524 %old = extractvalue { i8, i1 } %pair, 0 1525 %u = zext i8 %old to i32 1526 ret i32 %u 1527} 1528 1529;===---------------------------------------------------------------------------- 1530; Waits: 32-bit 1531;===---------------------------------------------------------------------------- 1532 1533declare i32 @llvm.wasm.memory.atomic.wait32(i32*, i32, i64) 1534 1535; Basic wait. 1536 1537; CHECK-LABEL: wait32_no_offset: 1538; CHECK: memory.atomic.wait32 $push0=, 0($0), $1, $2{{$}} 1539; CHECK-NEXT: return $pop0{{$}} 1540define i32 @wait32_no_offset(i32* %p, i32 %exp, i64 %timeout) { 1541 %v = call i32 @llvm.wasm.memory.atomic.wait32(i32* %p, i32 %exp, i64 %timeout) 1542 ret i32 %v 1543} 1544 1545; With an nuw add, we can fold an offset. 1546 1547; CHECK-LABEL: wait32_with_folded_offset: 1548; CHECK: memory.atomic.wait32 $push0=, 24($0), $1, $2{{$}} 1549define i32 @wait32_with_folded_offset(i32* %p, i32 %exp, i64 %timeout) { 1550 %q = ptrtoint i32* %p to i32 1551 %r = add nuw i32 %q, 24 1552 %s = inttoptr i32 %r to i32* 1553 %t = call i32 @llvm.wasm.memory.atomic.wait32(i32* %s, i32 %exp, i64 %timeout) 1554 ret i32 %t 1555} 1556 1557; With an inbounds gep, we can fold an offset. 1558 1559; CHECK-LABEL: wait32_with_folded_gep_offset: 1560; CHECK: memory.atomic.wait32 $push0=, 24($0), $1, $2{{$}} 1561define i32 @wait32_with_folded_gep_offset(i32* %p, i32 %exp, i64 %timeout) { 1562 %s = getelementptr inbounds i32, i32* %p, i32 6 1563 %t = call i32 @llvm.wasm.memory.atomic.wait32(i32* %s, i32 %exp, i64 %timeout) 1564 ret i32 %t 1565} 1566 1567; We can't fold a negative offset though, even with an inbounds gep. 1568 1569; CHECK-LABEL: wait32_with_unfolded_gep_negative_offset: 1570; CHECK: i32.const $push0=, -24{{$}} 1571; CHECK: i32.add $push1=, $0, $pop0{{$}} 1572; CHECK: memory.atomic.wait32 $push2=, 0($pop1), $1, $2{{$}} 1573define i32 @wait32_with_unfolded_gep_negative_offset(i32* %p, i32 %exp, i64 %timeout) { 1574 %s = getelementptr inbounds i32, i32* %p, i32 -6 1575 %t = call i32 @llvm.wasm.memory.atomic.wait32(i32* %s, i32 %exp, i64 %timeout) 1576 ret i32 %t 1577} 1578 1579; Without nuw, and even with nsw, we can't fold an offset. 1580 1581; CHECK-LABEL: wait32_with_unfolded_offset: 1582; CHECK: i32.const $push0=, 24{{$}} 1583; CHECK: i32.add $push1=, $0, $pop0{{$}} 1584; CHECK: memory.atomic.wait32 $push2=, 0($pop1), $1, $2{{$}} 1585define i32 @wait32_with_unfolded_offset(i32* %p, i32 %exp, i64 %timeout) { 1586 %q = ptrtoint i32* %p to i32 1587 %r = add nsw i32 %q, 24 1588 %s = inttoptr i32 %r to i32* 1589 %t = call i32 @llvm.wasm.memory.atomic.wait32(i32* %s, i32 %exp, i64 %timeout) 1590 ret i32 %t 1591} 1592 1593; Without inbounds, we can't fold a gep offset. 1594 1595; CHECK-LABEL: wait32_with_unfolded_gep_offset: 1596; CHECK: i32.const $push0=, 24{{$}} 1597; CHECK: i32.add $push1=, $0, $pop0{{$}} 1598; CHECK: memory.atomic.wait32 $push2=, 0($pop1), $1, $2{{$}} 1599define i32 @wait32_with_unfolded_gep_offset(i32* %p, i32 %exp, i64 %timeout) { 1600 %s = getelementptr i32, i32* %p, i32 6 1601 %t = call i32 @llvm.wasm.memory.atomic.wait32(i32* %s, i32 %exp, i64 %timeout) 1602 ret i32 %t 1603} 1604 1605; When waiting from a fixed address, materialize a zero. 1606 1607; CHECK-LABEL: wait32_from_numeric_address 1608; CHECK: i32.const $push0=, 0{{$}} 1609; CHECK: memory.atomic.wait32 $push1=, 42($pop0), $0, $1{{$}} 1610define i32 @wait32_from_numeric_address(i32 %exp, i64 %timeout) { 1611 %s = inttoptr i32 42 to i32* 1612 %t = call i32 @llvm.wasm.memory.atomic.wait32(i32* %s, i32 %exp, i64 %timeout) 1613 ret i32 %t 1614} 1615 1616; CHECK-LABEL: wait32_from_global_address 1617; CHECK: i32.const $push0=, 0{{$}} 1618; CHECK: memory.atomic.wait32 $push1=, gv($pop0), $0, $1{{$}} 1619define i32 @wait32_from_global_address(i32 %exp, i64 %timeout) { 1620 %t = call i32 @llvm.wasm.memory.atomic.wait32(i32* @gv, i32 %exp, i64 %timeout) 1621 ret i32 %t 1622} 1623 1624;===---------------------------------------------------------------------------- 1625; Waits: 64-bit 1626;===---------------------------------------------------------------------------- 1627 1628declare i32 @llvm.wasm.memory.atomic.wait64(i64*, i64, i64) 1629 1630; Basic wait. 1631 1632; CHECK-LABEL: wait64_no_offset: 1633; CHECK: memory.atomic.wait64 $push0=, 0($0), $1, $2{{$}} 1634; CHECK-NEXT: return $pop0{{$}} 1635define i32 @wait64_no_offset(i64* %p, i64 %exp, i64 %timeout) { 1636 %v = call i32 @llvm.wasm.memory.atomic.wait64(i64* %p, i64 %exp, i64 %timeout) 1637 ret i32 %v 1638} 1639 1640; With an nuw add, we can fold an offset. 1641 1642; CHECK-LABEL: wait64_with_folded_offset: 1643; CHECK: memory.atomic.wait64 $push0=, 24($0), $1, $2{{$}} 1644define i32 @wait64_with_folded_offset(i64* %p, i64 %exp, i64 %timeout) { 1645 %q = ptrtoint i64* %p to i32 1646 %r = add nuw i32 %q, 24 1647 %s = inttoptr i32 %r to i64* 1648 %t = call i32 @llvm.wasm.memory.atomic.wait64(i64* %s, i64 %exp, i64 %timeout) 1649 ret i32 %t 1650} 1651 1652; With an inbounds gep, we can fold an offset. 1653 1654; CHECK-LABEL: wait64_with_folded_gep_offset: 1655; CHECK: memory.atomic.wait64 $push0=, 24($0), $1, $2{{$}} 1656define i32 @wait64_with_folded_gep_offset(i64* %p, i64 %exp, i64 %timeout) { 1657 %s = getelementptr inbounds i64, i64* %p, i32 3 1658 %t = call i32 @llvm.wasm.memory.atomic.wait64(i64* %s, i64 %exp, i64 %timeout) 1659 ret i32 %t 1660} 1661 1662; We can't fold a negative offset though, even with an inbounds gep. 1663 1664; CHECK-LABEL: wait64_with_unfolded_gep_negative_offset: 1665; CHECK: i32.const $push0=, -24{{$}} 1666; CHECK: i32.add $push1=, $0, $pop0{{$}} 1667; CHECK: memory.atomic.wait64 $push2=, 0($pop1), $1, $2{{$}} 1668define i32 @wait64_with_unfolded_gep_negative_offset(i64* %p, i64 %exp, i64 %timeout) { 1669 %s = getelementptr inbounds i64, i64* %p, i32 -3 1670 %t = call i32 @llvm.wasm.memory.atomic.wait64(i64* %s, i64 %exp, i64 %timeout) 1671 ret i32 %t 1672} 1673 1674; Without nuw, and even with nsw, we can't fold an offset. 1675 1676; CHECK-LABEL: wait64_with_unfolded_offset: 1677; CHECK: i32.const $push0=, 24{{$}} 1678; CHECK: i32.add $push1=, $0, $pop0{{$}} 1679; CHECK: memory.atomic.wait64 $push2=, 0($pop1), $1, $2{{$}} 1680define i32 @wait64_with_unfolded_offset(i64* %p, i64 %exp, i64 %timeout) { 1681 %q = ptrtoint i64* %p to i32 1682 %r = add nsw i32 %q, 24 1683 %s = inttoptr i32 %r to i64* 1684 %t = call i32 @llvm.wasm.memory.atomic.wait64(i64* %s, i64 %exp, i64 %timeout) 1685 ret i32 %t 1686} 1687 1688; Without inbounds, we can't fold a gep offset. 1689 1690; CHECK-LABEL: wait64_with_unfolded_gep_offset: 1691; CHECK: i32.const $push0=, 24{{$}} 1692; CHECK: i32.add $push1=, $0, $pop0{{$}} 1693; CHECK: memory.atomic.wait64 $push2=, 0($pop1), $1, $2{{$}} 1694define i32 @wait64_with_unfolded_gep_offset(i64* %p, i64 %exp, i64 %timeout) { 1695 %s = getelementptr i64, i64* %p, i32 3 1696 %t = call i32 @llvm.wasm.memory.atomic.wait64(i64* %s, i64 %exp, i64 %timeout) 1697 ret i32 %t 1698} 1699 1700;===---------------------------------------------------------------------------- 1701; Notifies 1702;===---------------------------------------------------------------------------- 1703 1704declare i32 @llvm.wasm.memory.atomic.notify(i32*, i32) 1705 1706; Basic notify. 1707 1708; CHECK-LABEL: notify_no_offset: 1709; CHECK: memory.atomic.notify $push0=, 0($0), $1{{$}} 1710; CHECK-NEXT: return $pop0{{$}} 1711define i32 @notify_no_offset(i32* %p, i32 %notify_count) { 1712 %v = call i32 @llvm.wasm.memory.atomic.notify(i32* %p, i32 %notify_count) 1713 ret i32 %v 1714} 1715 1716; With an nuw add, we can fold an offset. 1717 1718; CHECK-LABEL: notify_with_folded_offset: 1719; CHECK: memory.atomic.notify $push0=, 24($0), $1{{$}} 1720define i32 @notify_with_folded_offset(i32* %p, i32 %notify_count) { 1721 %q = ptrtoint i32* %p to i32 1722 %r = add nuw i32 %q, 24 1723 %s = inttoptr i32 %r to i32* 1724 %t = call i32 @llvm.wasm.memory.atomic.notify(i32* %s, i32 %notify_count) 1725 ret i32 %t 1726} 1727 1728; With an inbounds gep, we can fold an offset. 1729 1730; CHECK-LABEL: notify_with_folded_gep_offset: 1731; CHECK: memory.atomic.notify $push0=, 24($0), $1{{$}} 1732define i32 @notify_with_folded_gep_offset(i32* %p, i32 %notify_count) { 1733 %s = getelementptr inbounds i32, i32* %p, i32 6 1734 %t = call i32 @llvm.wasm.memory.atomic.notify(i32* %s, i32 %notify_count) 1735 ret i32 %t 1736} 1737 1738; We can't fold a negative offset though, even with an inbounds gep. 1739 1740; CHECK-LABEL: notify_with_unfolded_gep_negative_offset: 1741; CHECK: i32.const $push0=, -24{{$}} 1742; CHECK: i32.add $push1=, $0, $pop0{{$}} 1743; CHECK: memory.atomic.notify $push2=, 0($pop1), $1{{$}} 1744define i32 @notify_with_unfolded_gep_negative_offset(i32* %p, i32 %notify_count) { 1745 %s = getelementptr inbounds i32, i32* %p, i32 -6 1746 %t = call i32 @llvm.wasm.memory.atomic.notify(i32* %s, i32 %notify_count) 1747 ret i32 %t 1748} 1749 1750; Without nuw, and even with nsw, we can't fold an offset. 1751 1752; CHECK-LABEL: notify_with_unfolded_offset: 1753; CHECK: i32.const $push0=, 24{{$}} 1754; CHECK: i32.add $push1=, $0, $pop0{{$}} 1755; CHECK: memory.atomic.notify $push2=, 0($pop1), $1{{$}} 1756define i32 @notify_with_unfolded_offset(i32* %p, i32 %notify_count) { 1757 %q = ptrtoint i32* %p to i32 1758 %r = add nsw i32 %q, 24 1759 %s = inttoptr i32 %r to i32* 1760 %t = call i32 @llvm.wasm.memory.atomic.notify(i32* %s, i32 %notify_count) 1761 ret i32 %t 1762} 1763 1764; Without inbounds, we can't fold a gep offset. 1765 1766; CHECK-LABEL: notify_with_unfolded_gep_offset: 1767; CHECK: i32.const $push0=, 24{{$}} 1768; CHECK: i32.add $push1=, $0, $pop0{{$}} 1769; CHECK: memory.atomic.notify $push2=, 0($pop1), $1{{$}} 1770define i32 @notify_with_unfolded_gep_offset(i32* %p, i32 %notify_count) { 1771 %s = getelementptr i32, i32* %p, i32 6 1772 %t = call i32 @llvm.wasm.memory.atomic.notify(i32* %s, i32 %notify_count) 1773 ret i32 %t 1774} 1775 1776; When notifying from a fixed address, materialize a zero. 1777 1778; CHECK-LABEL: notify_from_numeric_address 1779; CHECK: i32.const $push0=, 0{{$}} 1780; CHECK: memory.atomic.notify $push1=, 42($pop0), $0{{$}} 1781define i32 @notify_from_numeric_address(i32 %notify_count) { 1782 %s = inttoptr i32 42 to i32* 1783 %t = call i32 @llvm.wasm.memory.atomic.notify(i32* %s, i32 %notify_count) 1784 ret i32 %t 1785} 1786 1787; CHECK-LABEL: notify_from_global_address 1788; CHECK: i32.const $push0=, 0{{$}} 1789; CHECK: memory.atomic.notify $push1=, gv($pop0), $0{{$}} 1790define i32 @notify_from_global_address(i32 %notify_count) { 1791 %t = call i32 @llvm.wasm.memory.atomic.notify(i32* @gv, i32 %notify_count) 1792 ret i32 %t 1793} 1794