1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 5 6define amdgpu_ps float @load_1d_f32_x(<8 x i32> inreg %rsrc, i32 %s) { 7; GFX6-LABEL: load_1d_f32_x: 8; GFX6: ; %bb.0: 9; GFX6-NEXT: s_mov_b32 s0, s2 10; GFX6-NEXT: s_mov_b32 s1, s3 11; GFX6-NEXT: s_mov_b32 s2, s4 12; GFX6-NEXT: s_mov_b32 s3, s5 13; GFX6-NEXT: s_mov_b32 s4, s6 14; GFX6-NEXT: s_mov_b32 s5, s7 15; GFX6-NEXT: s_mov_b32 s6, s8 16; GFX6-NEXT: s_mov_b32 s7, s9 17; GFX6-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm 18; GFX6-NEXT: s_waitcnt vmcnt(0) 19; GFX6-NEXT: ; return to shader part epilog 20; 21; GFX8-LABEL: load_1d_f32_x: 22; GFX8: ; %bb.0: 23; GFX8-NEXT: s_mov_b32 s0, s2 24; GFX8-NEXT: s_mov_b32 s1, s3 25; GFX8-NEXT: s_mov_b32 s2, s4 26; GFX8-NEXT: s_mov_b32 s3, s5 27; GFX8-NEXT: s_mov_b32 s4, s6 28; GFX8-NEXT: s_mov_b32 s5, s7 29; GFX8-NEXT: s_mov_b32 s6, s8 30; GFX8-NEXT: s_mov_b32 s7, s9 31; GFX8-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm 32; GFX8-NEXT: s_waitcnt vmcnt(0) 33; GFX8-NEXT: ; return to shader part epilog 34; 35; GFX10-LABEL: load_1d_f32_x: 36; GFX10: ; %bb.0: 37; GFX10-NEXT: s_mov_b32 s0, s2 38; GFX10-NEXT: s_mov_b32 s1, s3 39; GFX10-NEXT: s_mov_b32 s2, s4 40; GFX10-NEXT: s_mov_b32 s3, s5 41; GFX10-NEXT: s_mov_b32 s4, s6 42; GFX10-NEXT: s_mov_b32 s5, s7 43; GFX10-NEXT: s_mov_b32 s6, s8 44; GFX10-NEXT: s_mov_b32 s7, s9 45; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm 46; GFX10-NEXT: s_waitcnt vmcnt(0) 47; GFX10-NEXT: ; return to shader part epilog 48 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 49 ret float %v 50} 51 52define amdgpu_ps float @load_1d_f32_y(<8 x i32> inreg %rsrc, i32 %s) { 53; GFX6-LABEL: load_1d_f32_y: 54; GFX6: ; %bb.0: 55; GFX6-NEXT: s_mov_b32 s0, s2 56; GFX6-NEXT: s_mov_b32 s1, s3 57; GFX6-NEXT: s_mov_b32 s2, s4 58; GFX6-NEXT: s_mov_b32 s3, s5 59; GFX6-NEXT: s_mov_b32 s4, s6 60; GFX6-NEXT: s_mov_b32 s5, s7 61; GFX6-NEXT: s_mov_b32 s6, s8 62; GFX6-NEXT: s_mov_b32 s7, s9 63; GFX6-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm 64; GFX6-NEXT: s_waitcnt vmcnt(0) 65; GFX6-NEXT: ; return to shader part epilog 66; 67; GFX8-LABEL: load_1d_f32_y: 68; GFX8: ; %bb.0: 69; GFX8-NEXT: s_mov_b32 s0, s2 70; GFX8-NEXT: s_mov_b32 s1, s3 71; GFX8-NEXT: s_mov_b32 s2, s4 72; GFX8-NEXT: s_mov_b32 s3, s5 73; GFX8-NEXT: s_mov_b32 s4, s6 74; GFX8-NEXT: s_mov_b32 s5, s7 75; GFX8-NEXT: s_mov_b32 s6, s8 76; GFX8-NEXT: s_mov_b32 s7, s9 77; GFX8-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm 78; GFX8-NEXT: s_waitcnt vmcnt(0) 79; GFX8-NEXT: ; return to shader part epilog 80; 81; GFX10-LABEL: load_1d_f32_y: 82; GFX10: ; %bb.0: 83; GFX10-NEXT: s_mov_b32 s0, s2 84; GFX10-NEXT: s_mov_b32 s1, s3 85; GFX10-NEXT: s_mov_b32 s2, s4 86; GFX10-NEXT: s_mov_b32 s3, s5 87; GFX10-NEXT: s_mov_b32 s4, s6 88; GFX10-NEXT: s_mov_b32 s5, s7 89; GFX10-NEXT: s_mov_b32 s6, s8 90; GFX10-NEXT: s_mov_b32 s7, s9 91; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm 92; GFX10-NEXT: s_waitcnt vmcnt(0) 93; GFX10-NEXT: ; return to shader part epilog 94 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 95 ret float %v 96} 97 98define amdgpu_ps float @load_1d_f32_z(<8 x i32> inreg %rsrc, i32 %s) { 99; GFX6-LABEL: load_1d_f32_z: 100; GFX6: ; %bb.0: 101; GFX6-NEXT: s_mov_b32 s0, s2 102; GFX6-NEXT: s_mov_b32 s1, s3 103; GFX6-NEXT: s_mov_b32 s2, s4 104; GFX6-NEXT: s_mov_b32 s3, s5 105; GFX6-NEXT: s_mov_b32 s4, s6 106; GFX6-NEXT: s_mov_b32 s5, s7 107; GFX6-NEXT: s_mov_b32 s6, s8 108; GFX6-NEXT: s_mov_b32 s7, s9 109; GFX6-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm 110; GFX6-NEXT: s_waitcnt vmcnt(0) 111; GFX6-NEXT: ; return to shader part epilog 112; 113; GFX8-LABEL: load_1d_f32_z: 114; GFX8: ; %bb.0: 115; GFX8-NEXT: s_mov_b32 s0, s2 116; GFX8-NEXT: s_mov_b32 s1, s3 117; GFX8-NEXT: s_mov_b32 s2, s4 118; GFX8-NEXT: s_mov_b32 s3, s5 119; GFX8-NEXT: s_mov_b32 s4, s6 120; GFX8-NEXT: s_mov_b32 s5, s7 121; GFX8-NEXT: s_mov_b32 s6, s8 122; GFX8-NEXT: s_mov_b32 s7, s9 123; GFX8-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm 124; GFX8-NEXT: s_waitcnt vmcnt(0) 125; GFX8-NEXT: ; return to shader part epilog 126; 127; GFX10-LABEL: load_1d_f32_z: 128; GFX10: ; %bb.0: 129; GFX10-NEXT: s_mov_b32 s0, s2 130; GFX10-NEXT: s_mov_b32 s1, s3 131; GFX10-NEXT: s_mov_b32 s2, s4 132; GFX10-NEXT: s_mov_b32 s3, s5 133; GFX10-NEXT: s_mov_b32 s4, s6 134; GFX10-NEXT: s_mov_b32 s5, s7 135; GFX10-NEXT: s_mov_b32 s6, s8 136; GFX10-NEXT: s_mov_b32 s7, s9 137; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm 138; GFX10-NEXT: s_waitcnt vmcnt(0) 139; GFX10-NEXT: ; return to shader part epilog 140 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 141 ret float %v 142} 143 144define amdgpu_ps float @load_1d_f32_w(<8 x i32> inreg %rsrc, i32 %s) { 145; GFX6-LABEL: load_1d_f32_w: 146; GFX6: ; %bb.0: 147; GFX6-NEXT: s_mov_b32 s0, s2 148; GFX6-NEXT: s_mov_b32 s1, s3 149; GFX6-NEXT: s_mov_b32 s2, s4 150; GFX6-NEXT: s_mov_b32 s3, s5 151; GFX6-NEXT: s_mov_b32 s4, s6 152; GFX6-NEXT: s_mov_b32 s5, s7 153; GFX6-NEXT: s_mov_b32 s6, s8 154; GFX6-NEXT: s_mov_b32 s7, s9 155; GFX6-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm 156; GFX6-NEXT: s_waitcnt vmcnt(0) 157; GFX6-NEXT: ; return to shader part epilog 158; 159; GFX8-LABEL: load_1d_f32_w: 160; GFX8: ; %bb.0: 161; GFX8-NEXT: s_mov_b32 s0, s2 162; GFX8-NEXT: s_mov_b32 s1, s3 163; GFX8-NEXT: s_mov_b32 s2, s4 164; GFX8-NEXT: s_mov_b32 s3, s5 165; GFX8-NEXT: s_mov_b32 s4, s6 166; GFX8-NEXT: s_mov_b32 s5, s7 167; GFX8-NEXT: s_mov_b32 s6, s8 168; GFX8-NEXT: s_mov_b32 s7, s9 169; GFX8-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm 170; GFX8-NEXT: s_waitcnt vmcnt(0) 171; GFX8-NEXT: ; return to shader part epilog 172; 173; GFX10-LABEL: load_1d_f32_w: 174; GFX10: ; %bb.0: 175; GFX10-NEXT: s_mov_b32 s0, s2 176; GFX10-NEXT: s_mov_b32 s1, s3 177; GFX10-NEXT: s_mov_b32 s2, s4 178; GFX10-NEXT: s_mov_b32 s3, s5 179; GFX10-NEXT: s_mov_b32 s4, s6 180; GFX10-NEXT: s_mov_b32 s5, s7 181; GFX10-NEXT: s_mov_b32 s6, s8 182; GFX10-NEXT: s_mov_b32 s7, s9 183; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm 184; GFX10-NEXT: s_waitcnt vmcnt(0) 185; GFX10-NEXT: ; return to shader part epilog 186 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 187 ret float %v 188} 189 190define amdgpu_ps <2 x float> @load_1d_v2f32_xy(<8 x i32> inreg %rsrc, i32 %s) { 191; GFX6-LABEL: load_1d_v2f32_xy: 192; GFX6: ; %bb.0: 193; GFX6-NEXT: s_mov_b32 s0, s2 194; GFX6-NEXT: s_mov_b32 s1, s3 195; GFX6-NEXT: s_mov_b32 s2, s4 196; GFX6-NEXT: s_mov_b32 s3, s5 197; GFX6-NEXT: s_mov_b32 s4, s6 198; GFX6-NEXT: s_mov_b32 s5, s7 199; GFX6-NEXT: s_mov_b32 s6, s8 200; GFX6-NEXT: s_mov_b32 s7, s9 201; GFX6-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm 202; GFX6-NEXT: s_waitcnt vmcnt(0) 203; GFX6-NEXT: ; return to shader part epilog 204; 205; GFX8-LABEL: load_1d_v2f32_xy: 206; GFX8: ; %bb.0: 207; GFX8-NEXT: s_mov_b32 s0, s2 208; GFX8-NEXT: s_mov_b32 s1, s3 209; GFX8-NEXT: s_mov_b32 s2, s4 210; GFX8-NEXT: s_mov_b32 s3, s5 211; GFX8-NEXT: s_mov_b32 s4, s6 212; GFX8-NEXT: s_mov_b32 s5, s7 213; GFX8-NEXT: s_mov_b32 s6, s8 214; GFX8-NEXT: s_mov_b32 s7, s9 215; GFX8-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm 216; GFX8-NEXT: s_waitcnt vmcnt(0) 217; GFX8-NEXT: ; return to shader part epilog 218; 219; GFX10-LABEL: load_1d_v2f32_xy: 220; GFX10: ; %bb.0: 221; GFX10-NEXT: s_mov_b32 s0, s2 222; GFX10-NEXT: s_mov_b32 s1, s3 223; GFX10-NEXT: s_mov_b32 s2, s4 224; GFX10-NEXT: s_mov_b32 s3, s5 225; GFX10-NEXT: s_mov_b32 s4, s6 226; GFX10-NEXT: s_mov_b32 s5, s7 227; GFX10-NEXT: s_mov_b32 s6, s8 228; GFX10-NEXT: s_mov_b32 s7, s9 229; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm 230; GFX10-NEXT: s_waitcnt vmcnt(0) 231; GFX10-NEXT: ; return to shader part epilog 232 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 233 ret <2 x float> %v 234} 235 236define amdgpu_ps <2 x float> @load_1d_v2f32_xz(<8 x i32> inreg %rsrc, i32 %s) { 237; GFX6-LABEL: load_1d_v2f32_xz: 238; GFX6: ; %bb.0: 239; GFX6-NEXT: s_mov_b32 s0, s2 240; GFX6-NEXT: s_mov_b32 s1, s3 241; GFX6-NEXT: s_mov_b32 s2, s4 242; GFX6-NEXT: s_mov_b32 s3, s5 243; GFX6-NEXT: s_mov_b32 s4, s6 244; GFX6-NEXT: s_mov_b32 s5, s7 245; GFX6-NEXT: s_mov_b32 s6, s8 246; GFX6-NEXT: s_mov_b32 s7, s9 247; GFX6-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 unorm 248; GFX6-NEXT: s_waitcnt vmcnt(0) 249; GFX6-NEXT: ; return to shader part epilog 250; 251; GFX8-LABEL: load_1d_v2f32_xz: 252; GFX8: ; %bb.0: 253; GFX8-NEXT: s_mov_b32 s0, s2 254; GFX8-NEXT: s_mov_b32 s1, s3 255; GFX8-NEXT: s_mov_b32 s2, s4 256; GFX8-NEXT: s_mov_b32 s3, s5 257; GFX8-NEXT: s_mov_b32 s4, s6 258; GFX8-NEXT: s_mov_b32 s5, s7 259; GFX8-NEXT: s_mov_b32 s6, s8 260; GFX8-NEXT: s_mov_b32 s7, s9 261; GFX8-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 unorm 262; GFX8-NEXT: s_waitcnt vmcnt(0) 263; GFX8-NEXT: ; return to shader part epilog 264; 265; GFX10-LABEL: load_1d_v2f32_xz: 266; GFX10: ; %bb.0: 267; GFX10-NEXT: s_mov_b32 s0, s2 268; GFX10-NEXT: s_mov_b32 s1, s3 269; GFX10-NEXT: s_mov_b32 s2, s4 270; GFX10-NEXT: s_mov_b32 s3, s5 271; GFX10-NEXT: s_mov_b32 s4, s6 272; GFX10-NEXT: s_mov_b32 s5, s7 273; GFX10-NEXT: s_mov_b32 s6, s8 274; GFX10-NEXT: s_mov_b32 s7, s9 275; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm 276; GFX10-NEXT: s_waitcnt vmcnt(0) 277; GFX10-NEXT: ; return to shader part epilog 278 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 279 ret <2 x float> %v 280} 281 282define amdgpu_ps <2 x float> @load_1d_v2f32_xw(<8 x i32> inreg %rsrc, i32 %s) { 283; GFX6-LABEL: load_1d_v2f32_xw: 284; GFX6: ; %bb.0: 285; GFX6-NEXT: s_mov_b32 s0, s2 286; GFX6-NEXT: s_mov_b32 s1, s3 287; GFX6-NEXT: s_mov_b32 s2, s4 288; GFX6-NEXT: s_mov_b32 s3, s5 289; GFX6-NEXT: s_mov_b32 s4, s6 290; GFX6-NEXT: s_mov_b32 s5, s7 291; GFX6-NEXT: s_mov_b32 s6, s8 292; GFX6-NEXT: s_mov_b32 s7, s9 293; GFX6-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm 294; GFX6-NEXT: s_waitcnt vmcnt(0) 295; GFX6-NEXT: ; return to shader part epilog 296; 297; GFX8-LABEL: load_1d_v2f32_xw: 298; GFX8: ; %bb.0: 299; GFX8-NEXT: s_mov_b32 s0, s2 300; GFX8-NEXT: s_mov_b32 s1, s3 301; GFX8-NEXT: s_mov_b32 s2, s4 302; GFX8-NEXT: s_mov_b32 s3, s5 303; GFX8-NEXT: s_mov_b32 s4, s6 304; GFX8-NEXT: s_mov_b32 s5, s7 305; GFX8-NEXT: s_mov_b32 s6, s8 306; GFX8-NEXT: s_mov_b32 s7, s9 307; GFX8-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm 308; GFX8-NEXT: s_waitcnt vmcnt(0) 309; GFX8-NEXT: ; return to shader part epilog 310; 311; GFX10-LABEL: load_1d_v2f32_xw: 312; GFX10: ; %bb.0: 313; GFX10-NEXT: s_mov_b32 s0, s2 314; GFX10-NEXT: s_mov_b32 s1, s3 315; GFX10-NEXT: s_mov_b32 s2, s4 316; GFX10-NEXT: s_mov_b32 s3, s5 317; GFX10-NEXT: s_mov_b32 s4, s6 318; GFX10-NEXT: s_mov_b32 s5, s7 319; GFX10-NEXT: s_mov_b32 s6, s8 320; GFX10-NEXT: s_mov_b32 s7, s9 321; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm 322; GFX10-NEXT: s_waitcnt vmcnt(0) 323; GFX10-NEXT: ; return to shader part epilog 324 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 325 ret <2 x float> %v 326} 327 328define amdgpu_ps <2 x float> @load_1d_v2f32_yz(<8 x i32> inreg %rsrc, i32 %s) { 329; GFX6-LABEL: load_1d_v2f32_yz: 330; GFX6: ; %bb.0: 331; GFX6-NEXT: s_mov_b32 s0, s2 332; GFX6-NEXT: s_mov_b32 s1, s3 333; GFX6-NEXT: s_mov_b32 s2, s4 334; GFX6-NEXT: s_mov_b32 s3, s5 335; GFX6-NEXT: s_mov_b32 s4, s6 336; GFX6-NEXT: s_mov_b32 s5, s7 337; GFX6-NEXT: s_mov_b32 s6, s8 338; GFX6-NEXT: s_mov_b32 s7, s9 339; GFX6-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 unorm 340; GFX6-NEXT: s_waitcnt vmcnt(0) 341; GFX6-NEXT: ; return to shader part epilog 342; 343; GFX8-LABEL: load_1d_v2f32_yz: 344; GFX8: ; %bb.0: 345; GFX8-NEXT: s_mov_b32 s0, s2 346; GFX8-NEXT: s_mov_b32 s1, s3 347; GFX8-NEXT: s_mov_b32 s2, s4 348; GFX8-NEXT: s_mov_b32 s3, s5 349; GFX8-NEXT: s_mov_b32 s4, s6 350; GFX8-NEXT: s_mov_b32 s5, s7 351; GFX8-NEXT: s_mov_b32 s6, s8 352; GFX8-NEXT: s_mov_b32 s7, s9 353; GFX8-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 unorm 354; GFX8-NEXT: s_waitcnt vmcnt(0) 355; GFX8-NEXT: ; return to shader part epilog 356; 357; GFX10-LABEL: load_1d_v2f32_yz: 358; GFX10: ; %bb.0: 359; GFX10-NEXT: s_mov_b32 s0, s2 360; GFX10-NEXT: s_mov_b32 s1, s3 361; GFX10-NEXT: s_mov_b32 s2, s4 362; GFX10-NEXT: s_mov_b32 s3, s5 363; GFX10-NEXT: s_mov_b32 s4, s6 364; GFX10-NEXT: s_mov_b32 s5, s7 365; GFX10-NEXT: s_mov_b32 s6, s8 366; GFX10-NEXT: s_mov_b32 s7, s9 367; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm 368; GFX10-NEXT: s_waitcnt vmcnt(0) 369; GFX10-NEXT: ; return to shader part epilog 370 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 371 ret <2 x float> %v 372} 373 374define amdgpu_ps <3 x float> @load_1d_v3f32_xyz(<8 x i32> inreg %rsrc, i32 %s) { 375; GFX6-LABEL: load_1d_v3f32_xyz: 376; GFX6: ; %bb.0: 377; GFX6-NEXT: s_mov_b32 s0, s2 378; GFX6-NEXT: s_mov_b32 s1, s3 379; GFX6-NEXT: s_mov_b32 s2, s4 380; GFX6-NEXT: s_mov_b32 s3, s5 381; GFX6-NEXT: s_mov_b32 s4, s6 382; GFX6-NEXT: s_mov_b32 s5, s7 383; GFX6-NEXT: s_mov_b32 s6, s8 384; GFX6-NEXT: s_mov_b32 s7, s9 385; GFX6-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm 386; GFX6-NEXT: s_waitcnt vmcnt(0) 387; GFX6-NEXT: ; return to shader part epilog 388; 389; GFX8-LABEL: load_1d_v3f32_xyz: 390; GFX8: ; %bb.0: 391; GFX8-NEXT: s_mov_b32 s0, s2 392; GFX8-NEXT: s_mov_b32 s1, s3 393; GFX8-NEXT: s_mov_b32 s2, s4 394; GFX8-NEXT: s_mov_b32 s3, s5 395; GFX8-NEXT: s_mov_b32 s4, s6 396; GFX8-NEXT: s_mov_b32 s5, s7 397; GFX8-NEXT: s_mov_b32 s6, s8 398; GFX8-NEXT: s_mov_b32 s7, s9 399; GFX8-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm 400; GFX8-NEXT: s_waitcnt vmcnt(0) 401; GFX8-NEXT: ; return to shader part epilog 402; 403; GFX10-LABEL: load_1d_v3f32_xyz: 404; GFX10: ; %bb.0: 405; GFX10-NEXT: s_mov_b32 s0, s2 406; GFX10-NEXT: s_mov_b32 s1, s3 407; GFX10-NEXT: s_mov_b32 s2, s4 408; GFX10-NEXT: s_mov_b32 s3, s5 409; GFX10-NEXT: s_mov_b32 s4, s6 410; GFX10-NEXT: s_mov_b32 s5, s7 411; GFX10-NEXT: s_mov_b32 s6, s8 412; GFX10-NEXT: s_mov_b32 s7, s9 413; GFX10-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm 414; GFX10-NEXT: s_waitcnt vmcnt(0) 415; GFX10-NEXT: ; return to shader part epilog 416 %v = call <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 417 ret <3 x float> %v 418} 419 420define amdgpu_ps <4 x float> @load_1d_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s) { 421; GFX6-LABEL: load_1d_v4f32_xyzw: 422; GFX6: ; %bb.0: 423; GFX6-NEXT: s_mov_b32 s0, s2 424; GFX6-NEXT: s_mov_b32 s1, s3 425; GFX6-NEXT: s_mov_b32 s2, s4 426; GFX6-NEXT: s_mov_b32 s3, s5 427; GFX6-NEXT: s_mov_b32 s4, s6 428; GFX6-NEXT: s_mov_b32 s5, s7 429; GFX6-NEXT: s_mov_b32 s6, s8 430; GFX6-NEXT: s_mov_b32 s7, s9 431; GFX6-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm 432; GFX6-NEXT: s_waitcnt vmcnt(0) 433; GFX6-NEXT: ; return to shader part epilog 434; 435; GFX8-LABEL: load_1d_v4f32_xyzw: 436; GFX8: ; %bb.0: 437; GFX8-NEXT: s_mov_b32 s0, s2 438; GFX8-NEXT: s_mov_b32 s1, s3 439; GFX8-NEXT: s_mov_b32 s2, s4 440; GFX8-NEXT: s_mov_b32 s3, s5 441; GFX8-NEXT: s_mov_b32 s4, s6 442; GFX8-NEXT: s_mov_b32 s5, s7 443; GFX8-NEXT: s_mov_b32 s6, s8 444; GFX8-NEXT: s_mov_b32 s7, s9 445; GFX8-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm 446; GFX8-NEXT: s_waitcnt vmcnt(0) 447; GFX8-NEXT: ; return to shader part epilog 448; 449; GFX10-LABEL: load_1d_v4f32_xyzw: 450; GFX10: ; %bb.0: 451; GFX10-NEXT: s_mov_b32 s0, s2 452; GFX10-NEXT: s_mov_b32 s1, s3 453; GFX10-NEXT: s_mov_b32 s2, s4 454; GFX10-NEXT: s_mov_b32 s3, s5 455; GFX10-NEXT: s_mov_b32 s4, s6 456; GFX10-NEXT: s_mov_b32 s5, s7 457; GFX10-NEXT: s_mov_b32 s6, s8 458; GFX10-NEXT: s_mov_b32 s7, s9 459; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm 460; GFX10-NEXT: s_waitcnt vmcnt(0) 461; GFX10-NEXT: ; return to shader part epilog 462 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 463 ret <4 x float> %v 464} 465 466define amdgpu_ps float @load_1d_f32_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) { 467; GFX6-LABEL: load_1d_f32_tfe_dmask_x: 468; GFX6: ; %bb.0: 469; GFX6-NEXT: s_mov_b32 s0, s2 470; GFX6-NEXT: s_mov_b32 s1, s3 471; GFX6-NEXT: s_mov_b32 s2, s4 472; GFX6-NEXT: s_mov_b32 s3, s5 473; GFX6-NEXT: s_mov_b32 s4, s6 474; GFX6-NEXT: s_mov_b32 s5, s7 475; GFX6-NEXT: s_mov_b32 s6, s8 476; GFX6-NEXT: s_mov_b32 s7, s9 477; GFX6-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe 478; GFX6-NEXT: s_waitcnt vmcnt(0) 479; GFX6-NEXT: v_mov_b32_e32 v0, v1 480; GFX6-NEXT: ; return to shader part epilog 481; 482; GFX8-LABEL: load_1d_f32_tfe_dmask_x: 483; GFX8: ; %bb.0: 484; GFX8-NEXT: s_mov_b32 s0, s2 485; GFX8-NEXT: s_mov_b32 s1, s3 486; GFX8-NEXT: s_mov_b32 s2, s4 487; GFX8-NEXT: s_mov_b32 s3, s5 488; GFX8-NEXT: s_mov_b32 s4, s6 489; GFX8-NEXT: s_mov_b32 s5, s7 490; GFX8-NEXT: s_mov_b32 s6, s8 491; GFX8-NEXT: s_mov_b32 s7, s9 492; GFX8-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe 493; GFX8-NEXT: s_waitcnt vmcnt(0) 494; GFX8-NEXT: v_mov_b32_e32 v0, v1 495; GFX8-NEXT: ; return to shader part epilog 496; 497; GFX10-LABEL: load_1d_f32_tfe_dmask_x: 498; GFX10: ; %bb.0: 499; GFX10-NEXT: s_mov_b32 s0, s2 500; GFX10-NEXT: s_mov_b32 s1, s3 501; GFX10-NEXT: s_mov_b32 s2, s4 502; GFX10-NEXT: s_mov_b32 s3, s5 503; GFX10-NEXT: s_mov_b32 s4, s6 504; GFX10-NEXT: s_mov_b32 s5, s7 505; GFX10-NEXT: s_mov_b32 s6, s8 506; GFX10-NEXT: s_mov_b32 s7, s9 507; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe 508; GFX10-NEXT: s_waitcnt vmcnt(0) 509; GFX10-NEXT: v_mov_b32_e32 v0, v1 510; GFX10-NEXT: ; return to shader part epilog 511 %v = call { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 512 %v.err = extractvalue { float, i32 } %v, 1 513 %vv = bitcast i32 %v.err to float 514 ret float %vv 515} 516 517define amdgpu_ps float @load_1d_v2f32_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) { 518; GFX6-LABEL: load_1d_v2f32_tfe_dmask_xy: 519; GFX6: ; %bb.0: 520; GFX6-NEXT: s_mov_b32 s0, s2 521; GFX6-NEXT: s_mov_b32 s1, s3 522; GFX6-NEXT: s_mov_b32 s2, s4 523; GFX6-NEXT: s_mov_b32 s3, s5 524; GFX6-NEXT: s_mov_b32 s4, s6 525; GFX6-NEXT: s_mov_b32 s5, s7 526; GFX6-NEXT: s_mov_b32 s6, s8 527; GFX6-NEXT: s_mov_b32 s7, s9 528; GFX6-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x3 unorm tfe 529; GFX6-NEXT: s_waitcnt vmcnt(0) 530; GFX6-NEXT: v_mov_b32_e32 v0, v2 531; GFX6-NEXT: ; return to shader part epilog 532; 533; GFX8-LABEL: load_1d_v2f32_tfe_dmask_xy: 534; GFX8: ; %bb.0: 535; GFX8-NEXT: s_mov_b32 s0, s2 536; GFX8-NEXT: s_mov_b32 s1, s3 537; GFX8-NEXT: s_mov_b32 s2, s4 538; GFX8-NEXT: s_mov_b32 s3, s5 539; GFX8-NEXT: s_mov_b32 s4, s6 540; GFX8-NEXT: s_mov_b32 s5, s7 541; GFX8-NEXT: s_mov_b32 s6, s8 542; GFX8-NEXT: s_mov_b32 s7, s9 543; GFX8-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x3 unorm tfe 544; GFX8-NEXT: s_waitcnt vmcnt(0) 545; GFX8-NEXT: v_mov_b32_e32 v0, v2 546; GFX8-NEXT: ; return to shader part epilog 547; 548; GFX10-LABEL: load_1d_v2f32_tfe_dmask_xy: 549; GFX10: ; %bb.0: 550; GFX10-NEXT: s_mov_b32 s0, s2 551; GFX10-NEXT: s_mov_b32 s1, s3 552; GFX10-NEXT: s_mov_b32 s2, s4 553; GFX10-NEXT: s_mov_b32 s3, s5 554; GFX10-NEXT: s_mov_b32 s4, s6 555; GFX10-NEXT: s_mov_b32 s5, s7 556; GFX10-NEXT: s_mov_b32 s6, s8 557; GFX10-NEXT: s_mov_b32 s7, s9 558; GFX10-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe 559; GFX10-NEXT: s_waitcnt vmcnt(0) 560; GFX10-NEXT: v_mov_b32_e32 v0, v2 561; GFX10-NEXT: ; return to shader part epilog 562 %v = call { <2 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 563 %v.err = extractvalue { <2 x float>, i32 } %v, 1 564 %vv = bitcast i32 %v.err to float 565 ret float %vv 566} 567 568define amdgpu_ps float @load_1d_v3f32_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) { 569; GFX6-LABEL: load_1d_v3f32_tfe_dmask_xyz: 570; GFX6: ; %bb.0: 571; GFX6-NEXT: s_mov_b32 s0, s2 572; GFX6-NEXT: s_mov_b32 s1, s3 573; GFX6-NEXT: s_mov_b32 s2, s4 574; GFX6-NEXT: s_mov_b32 s3, s5 575; GFX6-NEXT: s_mov_b32 s4, s6 576; GFX6-NEXT: s_mov_b32 s5, s7 577; GFX6-NEXT: s_mov_b32 s6, s8 578; GFX6-NEXT: s_mov_b32 s7, s9 579; GFX6-NEXT: image_load v[0:3], v0, s[0:7] dmask:0x7 unorm tfe 580; GFX6-NEXT: s_waitcnt vmcnt(0) 581; GFX6-NEXT: v_mov_b32_e32 v0, v3 582; GFX6-NEXT: ; return to shader part epilog 583; 584; GFX8-LABEL: load_1d_v3f32_tfe_dmask_xyz: 585; GFX8: ; %bb.0: 586; GFX8-NEXT: s_mov_b32 s0, s2 587; GFX8-NEXT: s_mov_b32 s1, s3 588; GFX8-NEXT: s_mov_b32 s2, s4 589; GFX8-NEXT: s_mov_b32 s3, s5 590; GFX8-NEXT: s_mov_b32 s4, s6 591; GFX8-NEXT: s_mov_b32 s5, s7 592; GFX8-NEXT: s_mov_b32 s6, s8 593; GFX8-NEXT: s_mov_b32 s7, s9 594; GFX8-NEXT: image_load v[0:3], v0, s[0:7] dmask:0x7 unorm tfe 595; GFX8-NEXT: s_waitcnt vmcnt(0) 596; GFX8-NEXT: v_mov_b32_e32 v0, v3 597; GFX8-NEXT: ; return to shader part epilog 598; 599; GFX10-LABEL: load_1d_v3f32_tfe_dmask_xyz: 600; GFX10: ; %bb.0: 601; GFX10-NEXT: s_mov_b32 s0, s2 602; GFX10-NEXT: s_mov_b32 s1, s3 603; GFX10-NEXT: s_mov_b32 s2, s4 604; GFX10-NEXT: s_mov_b32 s3, s5 605; GFX10-NEXT: s_mov_b32 s4, s6 606; GFX10-NEXT: s_mov_b32 s5, s7 607; GFX10-NEXT: s_mov_b32 s6, s8 608; GFX10-NEXT: s_mov_b32 s7, s9 609; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe 610; GFX10-NEXT: s_waitcnt vmcnt(0) 611; GFX10-NEXT: v_mov_b32_e32 v0, v3 612; GFX10-NEXT: ; return to shader part epilog 613 %v = call { <3 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f32i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 614 %v.err = extractvalue { <3 x float>, i32 } %v, 1 615 %vv = bitcast i32 %v.err to float 616 ret float %vv 617} 618 619define amdgpu_ps float @load_1d_v4f32_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) { 620; GFX6-LABEL: load_1d_v4f32_tfe_dmask_xyzw: 621; GFX6: ; %bb.0: 622; GFX6-NEXT: s_mov_b32 s0, s2 623; GFX6-NEXT: s_mov_b32 s1, s3 624; GFX6-NEXT: s_mov_b32 s2, s4 625; GFX6-NEXT: s_mov_b32 s3, s5 626; GFX6-NEXT: s_mov_b32 s4, s6 627; GFX6-NEXT: s_mov_b32 s5, s7 628; GFX6-NEXT: s_mov_b32 s6, s8 629; GFX6-NEXT: s_mov_b32 s7, s9 630; GFX6-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe 631; GFX6-NEXT: s_waitcnt vmcnt(0) 632; GFX6-NEXT: v_mov_b32_e32 v0, v1 633; GFX6-NEXT: ; return to shader part epilog 634; 635; GFX8-LABEL: load_1d_v4f32_tfe_dmask_xyzw: 636; GFX8: ; %bb.0: 637; GFX8-NEXT: s_mov_b32 s0, s2 638; GFX8-NEXT: s_mov_b32 s1, s3 639; GFX8-NEXT: s_mov_b32 s2, s4 640; GFX8-NEXT: s_mov_b32 s3, s5 641; GFX8-NEXT: s_mov_b32 s4, s6 642; GFX8-NEXT: s_mov_b32 s5, s7 643; GFX8-NEXT: s_mov_b32 s6, s8 644; GFX8-NEXT: s_mov_b32 s7, s9 645; GFX8-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe 646; GFX8-NEXT: s_waitcnt vmcnt(0) 647; GFX8-NEXT: v_mov_b32_e32 v0, v1 648; GFX8-NEXT: ; return to shader part epilog 649; 650; GFX10-LABEL: load_1d_v4f32_tfe_dmask_xyzw: 651; GFX10: ; %bb.0: 652; GFX10-NEXT: s_mov_b32 s0, s2 653; GFX10-NEXT: s_mov_b32 s1, s3 654; GFX10-NEXT: s_mov_b32 s2, s4 655; GFX10-NEXT: s_mov_b32 s3, s5 656; GFX10-NEXT: s_mov_b32 s4, s6 657; GFX10-NEXT: s_mov_b32 s5, s7 658; GFX10-NEXT: s_mov_b32 s6, s8 659; GFX10-NEXT: s_mov_b32 s7, s9 660; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe 661; GFX10-NEXT: s_waitcnt vmcnt(0) 662; GFX10-NEXT: v_mov_b32_e32 v0, v1 663; GFX10-NEXT: ; return to shader part epilog 664 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 665 %v.err = extractvalue { <4 x float>, i32 } %v, 1 666 %vv = bitcast i32 %v.err to float 667 ret float %vv 668} 669 670define amdgpu_ps float @load_1d_f32_tfe_dmask_0(<8 x i32> inreg %rsrc, i32 %s) { 671; GFX6-LABEL: load_1d_f32_tfe_dmask_0: 672; GFX6: ; %bb.0: 673; GFX6-NEXT: s_mov_b32 s0, s2 674; GFX6-NEXT: s_mov_b32 s1, s3 675; GFX6-NEXT: s_mov_b32 s2, s4 676; GFX6-NEXT: s_mov_b32 s3, s5 677; GFX6-NEXT: s_mov_b32 s4, s6 678; GFX6-NEXT: s_mov_b32 s5, s7 679; GFX6-NEXT: s_mov_b32 s6, s8 680; GFX6-NEXT: s_mov_b32 s7, s9 681; GFX6-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe 682; GFX6-NEXT: s_waitcnt vmcnt(0) 683; GFX6-NEXT: v_mov_b32_e32 v0, v1 684; GFX6-NEXT: ; return to shader part epilog 685; 686; GFX8-LABEL: load_1d_f32_tfe_dmask_0: 687; GFX8: ; %bb.0: 688; GFX8-NEXT: s_mov_b32 s0, s2 689; GFX8-NEXT: s_mov_b32 s1, s3 690; GFX8-NEXT: s_mov_b32 s2, s4 691; GFX8-NEXT: s_mov_b32 s3, s5 692; GFX8-NEXT: s_mov_b32 s4, s6 693; GFX8-NEXT: s_mov_b32 s5, s7 694; GFX8-NEXT: s_mov_b32 s6, s8 695; GFX8-NEXT: s_mov_b32 s7, s9 696; GFX8-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe 697; GFX8-NEXT: s_waitcnt vmcnt(0) 698; GFX8-NEXT: v_mov_b32_e32 v0, v1 699; GFX8-NEXT: ; return to shader part epilog 700; 701; GFX10-LABEL: load_1d_f32_tfe_dmask_0: 702; GFX10: ; %bb.0: 703; GFX10-NEXT: s_mov_b32 s0, s2 704; GFX10-NEXT: s_mov_b32 s1, s3 705; GFX10-NEXT: s_mov_b32 s2, s4 706; GFX10-NEXT: s_mov_b32 s3, s5 707; GFX10-NEXT: s_mov_b32 s4, s6 708; GFX10-NEXT: s_mov_b32 s5, s7 709; GFX10-NEXT: s_mov_b32 s6, s8 710; GFX10-NEXT: s_mov_b32 s7, s9 711; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe 712; GFX10-NEXT: s_waitcnt vmcnt(0) 713; GFX10-NEXT: v_mov_b32_e32 v0, v1 714; GFX10-NEXT: ; return to shader part epilog 715 %v = call { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 716 %v.err = extractvalue { float, i32 } %v, 1 717 %vv = bitcast i32 %v.err to float 718 ret float %vv 719} 720 721declare float @llvm.amdgcn.image.load.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 722declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 723declare <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 724declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 725 726declare { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 727declare { <2 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 728declare { <3 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 729declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 730 731attributes #0 = { nounwind readonly } 732