1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-UNPACKED %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-PACKED %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 6 7define amdgpu_ps half @load_1d_f16_x(<8 x i32> inreg %rsrc, i32 %s) { 8; GFX8-UNPACKED-LABEL: load_1d_f16_x: 9; GFX8-UNPACKED: ; %bb.0: 10; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 11; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 12; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 13; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 14; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 15; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 16; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 17; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 18; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16 19; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 20; GFX8-UNPACKED-NEXT: ; return to shader part epilog 21; 22; GFX8-PACKED-LABEL: load_1d_f16_x: 23; GFX8-PACKED: ; %bb.0: 24; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 25; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 26; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 27; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 28; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 29; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 30; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 31; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 32; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16 33; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 34; GFX8-PACKED-NEXT: ; return to shader part epilog 35; 36; GFX9-LABEL: load_1d_f16_x: 37; GFX9: ; %bb.0: 38; GFX9-NEXT: s_mov_b32 s0, s2 39; GFX9-NEXT: s_mov_b32 s1, s3 40; GFX9-NEXT: s_mov_b32 s2, s4 41; GFX9-NEXT: s_mov_b32 s3, s5 42; GFX9-NEXT: s_mov_b32 s4, s6 43; GFX9-NEXT: s_mov_b32 s5, s7 44; GFX9-NEXT: s_mov_b32 s6, s8 45; GFX9-NEXT: s_mov_b32 s7, s9 46; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16 47; GFX9-NEXT: s_waitcnt vmcnt(0) 48; GFX9-NEXT: ; return to shader part epilog 49; 50; GFX10-LABEL: load_1d_f16_x: 51; GFX10: ; %bb.0: 52; GFX10-NEXT: s_mov_b32 s0, s2 53; GFX10-NEXT: s_mov_b32 s1, s3 54; GFX10-NEXT: s_mov_b32 s2, s4 55; GFX10-NEXT: s_mov_b32 s3, s5 56; GFX10-NEXT: s_mov_b32 s4, s6 57; GFX10-NEXT: s_mov_b32 s5, s7 58; GFX10-NEXT: s_mov_b32 s6, s8 59; GFX10-NEXT: s_mov_b32 s7, s9 60; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm d16 61; GFX10-NEXT: s_waitcnt vmcnt(0) 62; GFX10-NEXT: ; return to shader part epilog 63 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 64 ret half %v 65} 66 67define amdgpu_ps half @load_1d_f16_y(<8 x i32> inreg %rsrc, i32 %s) { 68; GFX8-UNPACKED-LABEL: load_1d_f16_y: 69; GFX8-UNPACKED: ; %bb.0: 70; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 71; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 72; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 73; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 74; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 75; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 76; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 77; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 78; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16 79; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 80; GFX8-UNPACKED-NEXT: ; return to shader part epilog 81; 82; GFX8-PACKED-LABEL: load_1d_f16_y: 83; GFX8-PACKED: ; %bb.0: 84; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 85; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 86; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 87; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 88; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 89; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 90; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 91; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 92; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16 93; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 94; GFX8-PACKED-NEXT: ; return to shader part epilog 95; 96; GFX9-LABEL: load_1d_f16_y: 97; GFX9: ; %bb.0: 98; GFX9-NEXT: s_mov_b32 s0, s2 99; GFX9-NEXT: s_mov_b32 s1, s3 100; GFX9-NEXT: s_mov_b32 s2, s4 101; GFX9-NEXT: s_mov_b32 s3, s5 102; GFX9-NEXT: s_mov_b32 s4, s6 103; GFX9-NEXT: s_mov_b32 s5, s7 104; GFX9-NEXT: s_mov_b32 s6, s8 105; GFX9-NEXT: s_mov_b32 s7, s9 106; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16 107; GFX9-NEXT: s_waitcnt vmcnt(0) 108; GFX9-NEXT: ; return to shader part epilog 109; 110; GFX10-LABEL: load_1d_f16_y: 111; GFX10: ; %bb.0: 112; GFX10-NEXT: s_mov_b32 s0, s2 113; GFX10-NEXT: s_mov_b32 s1, s3 114; GFX10-NEXT: s_mov_b32 s2, s4 115; GFX10-NEXT: s_mov_b32 s3, s5 116; GFX10-NEXT: s_mov_b32 s4, s6 117; GFX10-NEXT: s_mov_b32 s5, s7 118; GFX10-NEXT: s_mov_b32 s6, s8 119; GFX10-NEXT: s_mov_b32 s7, s9 120; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm d16 121; GFX10-NEXT: s_waitcnt vmcnt(0) 122; GFX10-NEXT: ; return to shader part epilog 123 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 124 ret half %v 125} 126 127define amdgpu_ps half @load_1d_f16_z(<8 x i32> inreg %rsrc, i32 %s) { 128; GFX8-UNPACKED-LABEL: load_1d_f16_z: 129; GFX8-UNPACKED: ; %bb.0: 130; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 131; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 132; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 133; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 134; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 135; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 136; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 137; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 138; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16 139; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 140; GFX8-UNPACKED-NEXT: ; return to shader part epilog 141; 142; GFX8-PACKED-LABEL: load_1d_f16_z: 143; GFX8-PACKED: ; %bb.0: 144; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 145; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 146; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 147; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 148; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 149; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 150; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 151; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 152; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16 153; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 154; GFX8-PACKED-NEXT: ; return to shader part epilog 155; 156; GFX9-LABEL: load_1d_f16_z: 157; GFX9: ; %bb.0: 158; GFX9-NEXT: s_mov_b32 s0, s2 159; GFX9-NEXT: s_mov_b32 s1, s3 160; GFX9-NEXT: s_mov_b32 s2, s4 161; GFX9-NEXT: s_mov_b32 s3, s5 162; GFX9-NEXT: s_mov_b32 s4, s6 163; GFX9-NEXT: s_mov_b32 s5, s7 164; GFX9-NEXT: s_mov_b32 s6, s8 165; GFX9-NEXT: s_mov_b32 s7, s9 166; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16 167; GFX9-NEXT: s_waitcnt vmcnt(0) 168; GFX9-NEXT: ; return to shader part epilog 169; 170; GFX10-LABEL: load_1d_f16_z: 171; GFX10: ; %bb.0: 172; GFX10-NEXT: s_mov_b32 s0, s2 173; GFX10-NEXT: s_mov_b32 s1, s3 174; GFX10-NEXT: s_mov_b32 s2, s4 175; GFX10-NEXT: s_mov_b32 s3, s5 176; GFX10-NEXT: s_mov_b32 s4, s6 177; GFX10-NEXT: s_mov_b32 s5, s7 178; GFX10-NEXT: s_mov_b32 s6, s8 179; GFX10-NEXT: s_mov_b32 s7, s9 180; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm d16 181; GFX10-NEXT: s_waitcnt vmcnt(0) 182; GFX10-NEXT: ; return to shader part epilog 183 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 184 ret half %v 185} 186 187define amdgpu_ps half @load_1d_f16_w(<8 x i32> inreg %rsrc, i32 %s) { 188; GFX8-UNPACKED-LABEL: load_1d_f16_w: 189; GFX8-UNPACKED: ; %bb.0: 190; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 191; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 192; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 193; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 194; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 195; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 196; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 197; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 198; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16 199; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 200; GFX8-UNPACKED-NEXT: ; return to shader part epilog 201; 202; GFX8-PACKED-LABEL: load_1d_f16_w: 203; GFX8-PACKED: ; %bb.0: 204; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 205; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 206; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 207; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 208; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 209; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 210; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 211; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 212; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16 213; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 214; GFX8-PACKED-NEXT: ; return to shader part epilog 215; 216; GFX9-LABEL: load_1d_f16_w: 217; GFX9: ; %bb.0: 218; GFX9-NEXT: s_mov_b32 s0, s2 219; GFX9-NEXT: s_mov_b32 s1, s3 220; GFX9-NEXT: s_mov_b32 s2, s4 221; GFX9-NEXT: s_mov_b32 s3, s5 222; GFX9-NEXT: s_mov_b32 s4, s6 223; GFX9-NEXT: s_mov_b32 s5, s7 224; GFX9-NEXT: s_mov_b32 s6, s8 225; GFX9-NEXT: s_mov_b32 s7, s9 226; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16 227; GFX9-NEXT: s_waitcnt vmcnt(0) 228; GFX9-NEXT: ; return to shader part epilog 229; 230; GFX10-LABEL: load_1d_f16_w: 231; GFX10: ; %bb.0: 232; GFX10-NEXT: s_mov_b32 s0, s2 233; GFX10-NEXT: s_mov_b32 s1, s3 234; GFX10-NEXT: s_mov_b32 s2, s4 235; GFX10-NEXT: s_mov_b32 s3, s5 236; GFX10-NEXT: s_mov_b32 s4, s6 237; GFX10-NEXT: s_mov_b32 s5, s7 238; GFX10-NEXT: s_mov_b32 s6, s8 239; GFX10-NEXT: s_mov_b32 s7, s9 240; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm d16 241; GFX10-NEXT: s_waitcnt vmcnt(0) 242; GFX10-NEXT: ; return to shader part epilog 243 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 244 ret half %v 245} 246 247define amdgpu_ps <2 x half> @load_1d_v2f16_xy(<8 x i32> inreg %rsrc, i32 %s) { 248; GFX8-UNPACKED-LABEL: load_1d_v2f16_xy: 249; GFX8-UNPACKED: ; %bb.0: 250; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 251; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 252; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 253; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 254; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 255; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 256; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 257; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 258; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm d16 259; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 260; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 261; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 262; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 263; GFX8-UNPACKED-NEXT: ; return to shader part epilog 264; 265; GFX8-PACKED-LABEL: load_1d_v2f16_xy: 266; GFX8-PACKED: ; %bb.0: 267; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 268; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 269; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 270; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 271; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 272; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 273; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 274; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 275; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x3 unorm d16 276; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 277; GFX8-PACKED-NEXT: ; return to shader part epilog 278; 279; GFX9-LABEL: load_1d_v2f16_xy: 280; GFX9: ; %bb.0: 281; GFX9-NEXT: s_mov_b32 s0, s2 282; GFX9-NEXT: s_mov_b32 s1, s3 283; GFX9-NEXT: s_mov_b32 s2, s4 284; GFX9-NEXT: s_mov_b32 s3, s5 285; GFX9-NEXT: s_mov_b32 s4, s6 286; GFX9-NEXT: s_mov_b32 s5, s7 287; GFX9-NEXT: s_mov_b32 s6, s8 288; GFX9-NEXT: s_mov_b32 s7, s9 289; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x3 unorm d16 290; GFX9-NEXT: s_waitcnt vmcnt(0) 291; GFX9-NEXT: ; return to shader part epilog 292; 293; GFX10-LABEL: load_1d_v2f16_xy: 294; GFX10: ; %bb.0: 295; GFX10-NEXT: s_mov_b32 s0, s2 296; GFX10-NEXT: s_mov_b32 s1, s3 297; GFX10-NEXT: s_mov_b32 s2, s4 298; GFX10-NEXT: s_mov_b32 s3, s5 299; GFX10-NEXT: s_mov_b32 s4, s6 300; GFX10-NEXT: s_mov_b32 s5, s7 301; GFX10-NEXT: s_mov_b32 s6, s8 302; GFX10-NEXT: s_mov_b32 s7, s9 303; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm d16 304; GFX10-NEXT: s_waitcnt vmcnt(0) 305; GFX10-NEXT: ; return to shader part epilog 306 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 307 ret <2 x half> %v 308} 309 310define amdgpu_ps <2 x half> @load_1d_v2f16_xz(<8 x i32> inreg %rsrc, i32 %s) { 311; GFX8-UNPACKED-LABEL: load_1d_v2f16_xz: 312; GFX8-UNPACKED: ; %bb.0: 313; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 314; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 315; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 316; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 317; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 318; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 319; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 320; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 321; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 unorm d16 322; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 323; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 324; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 325; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 326; GFX8-UNPACKED-NEXT: ; return to shader part epilog 327; 328; GFX8-PACKED-LABEL: load_1d_v2f16_xz: 329; GFX8-PACKED: ; %bb.0: 330; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 331; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 332; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 333; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 334; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 335; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 336; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 337; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 338; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x5 unorm d16 339; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 340; GFX8-PACKED-NEXT: ; return to shader part epilog 341; 342; GFX9-LABEL: load_1d_v2f16_xz: 343; GFX9: ; %bb.0: 344; GFX9-NEXT: s_mov_b32 s0, s2 345; GFX9-NEXT: s_mov_b32 s1, s3 346; GFX9-NEXT: s_mov_b32 s2, s4 347; GFX9-NEXT: s_mov_b32 s3, s5 348; GFX9-NEXT: s_mov_b32 s4, s6 349; GFX9-NEXT: s_mov_b32 s5, s7 350; GFX9-NEXT: s_mov_b32 s6, s8 351; GFX9-NEXT: s_mov_b32 s7, s9 352; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x5 unorm d16 353; GFX9-NEXT: s_waitcnt vmcnt(0) 354; GFX9-NEXT: ; return to shader part epilog 355; 356; GFX10-LABEL: load_1d_v2f16_xz: 357; GFX10: ; %bb.0: 358; GFX10-NEXT: s_mov_b32 s0, s2 359; GFX10-NEXT: s_mov_b32 s1, s3 360; GFX10-NEXT: s_mov_b32 s2, s4 361; GFX10-NEXT: s_mov_b32 s3, s5 362; GFX10-NEXT: s_mov_b32 s4, s6 363; GFX10-NEXT: s_mov_b32 s5, s7 364; GFX10-NEXT: s_mov_b32 s6, s8 365; GFX10-NEXT: s_mov_b32 s7, s9 366; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm d16 367; GFX10-NEXT: s_waitcnt vmcnt(0) 368; GFX10-NEXT: ; return to shader part epilog 369 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 370 ret <2 x half> %v 371} 372 373define amdgpu_ps <2 x half> @load_1d_v2f16_xw(<8 x i32> inreg %rsrc, i32 %s) { 374; GFX8-UNPACKED-LABEL: load_1d_v2f16_xw: 375; GFX8-UNPACKED: ; %bb.0: 376; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 377; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 378; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 379; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 380; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 381; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 382; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 383; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 384; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm d16 385; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 386; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 387; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 388; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 389; GFX8-UNPACKED-NEXT: ; return to shader part epilog 390; 391; GFX8-PACKED-LABEL: load_1d_v2f16_xw: 392; GFX8-PACKED: ; %bb.0: 393; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 394; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 395; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 396; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 397; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 398; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 399; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 400; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 401; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x9 unorm d16 402; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 403; GFX8-PACKED-NEXT: ; return to shader part epilog 404; 405; GFX9-LABEL: load_1d_v2f16_xw: 406; GFX9: ; %bb.0: 407; GFX9-NEXT: s_mov_b32 s0, s2 408; GFX9-NEXT: s_mov_b32 s1, s3 409; GFX9-NEXT: s_mov_b32 s2, s4 410; GFX9-NEXT: s_mov_b32 s3, s5 411; GFX9-NEXT: s_mov_b32 s4, s6 412; GFX9-NEXT: s_mov_b32 s5, s7 413; GFX9-NEXT: s_mov_b32 s6, s8 414; GFX9-NEXT: s_mov_b32 s7, s9 415; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x9 unorm d16 416; GFX9-NEXT: s_waitcnt vmcnt(0) 417; GFX9-NEXT: ; return to shader part epilog 418; 419; GFX10-LABEL: load_1d_v2f16_xw: 420; GFX10: ; %bb.0: 421; GFX10-NEXT: s_mov_b32 s0, s2 422; GFX10-NEXT: s_mov_b32 s1, s3 423; GFX10-NEXT: s_mov_b32 s2, s4 424; GFX10-NEXT: s_mov_b32 s3, s5 425; GFX10-NEXT: s_mov_b32 s4, s6 426; GFX10-NEXT: s_mov_b32 s5, s7 427; GFX10-NEXT: s_mov_b32 s6, s8 428; GFX10-NEXT: s_mov_b32 s7, s9 429; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm d16 430; GFX10-NEXT: s_waitcnt vmcnt(0) 431; GFX10-NEXT: ; return to shader part epilog 432 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 433 ret <2 x half> %v 434} 435 436define amdgpu_ps <2 x half> @load_1d_v2f16_yz(<8 x i32> inreg %rsrc, i32 %s) { 437; GFX8-UNPACKED-LABEL: load_1d_v2f16_yz: 438; GFX8-UNPACKED: ; %bb.0: 439; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 440; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 441; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 442; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 443; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 444; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 445; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 446; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 447; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 unorm d16 448; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 449; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 450; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 451; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 452; GFX8-UNPACKED-NEXT: ; return to shader part epilog 453; 454; GFX8-PACKED-LABEL: load_1d_v2f16_yz: 455; GFX8-PACKED: ; %bb.0: 456; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 457; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 458; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 459; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 460; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 461; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 462; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 463; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 464; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x6 unorm d16 465; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 466; GFX8-PACKED-NEXT: ; return to shader part epilog 467; 468; GFX9-LABEL: load_1d_v2f16_yz: 469; GFX9: ; %bb.0: 470; GFX9-NEXT: s_mov_b32 s0, s2 471; GFX9-NEXT: s_mov_b32 s1, s3 472; GFX9-NEXT: s_mov_b32 s2, s4 473; GFX9-NEXT: s_mov_b32 s3, s5 474; GFX9-NEXT: s_mov_b32 s4, s6 475; GFX9-NEXT: s_mov_b32 s5, s7 476; GFX9-NEXT: s_mov_b32 s6, s8 477; GFX9-NEXT: s_mov_b32 s7, s9 478; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x6 unorm d16 479; GFX9-NEXT: s_waitcnt vmcnt(0) 480; GFX9-NEXT: ; return to shader part epilog 481; 482; GFX10-LABEL: load_1d_v2f16_yz: 483; GFX10: ; %bb.0: 484; GFX10-NEXT: s_mov_b32 s0, s2 485; GFX10-NEXT: s_mov_b32 s1, s3 486; GFX10-NEXT: s_mov_b32 s2, s4 487; GFX10-NEXT: s_mov_b32 s3, s5 488; GFX10-NEXT: s_mov_b32 s4, s6 489; GFX10-NEXT: s_mov_b32 s5, s7 490; GFX10-NEXT: s_mov_b32 s6, s8 491; GFX10-NEXT: s_mov_b32 s7, s9 492; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm d16 493; GFX10-NEXT: s_waitcnt vmcnt(0) 494; GFX10-NEXT: ; return to shader part epilog 495 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 496 ret <2 x half> %v 497} 498 499define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { 500; GFX8-UNPACKED-LABEL: load_1d_v3f16_xyz: 501; GFX8-UNPACKED: ; %bb.0: 502; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 503; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 504; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 505; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 506; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 507; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 508; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 509; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 510; GFX8-UNPACKED-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm d16 511; GFX8-UNPACKED-NEXT: s_mov_b32 s0, 0xffff 512; GFX8-UNPACKED-NEXT: s_and_b32 s1, s0, s0 513; GFX8-UNPACKED-NEXT: s_lshl_b32 s1, s1, 16 514; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, s1 515; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 516; GFX8-UNPACKED-NEXT: v_and_b32_e32 v4, s0, v1 517; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 518; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v4 519; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 520; GFX8-UNPACKED-NEXT: ; return to shader part epilog 521; 522; GFX8-PACKED-LABEL: load_1d_v3f16_xyz: 523; GFX8-PACKED: ; %bb.0: 524; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 525; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 526; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 527; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 528; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 529; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 530; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 531; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 532; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16 533; GFX8-PACKED-NEXT: s_mov_b32 s0, 0xffff 534; GFX8-PACKED-NEXT: s_and_b32 s0, s0, s0 535; GFX8-PACKED-NEXT: s_lshl_b32 s0, s0, 16 536; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, s0 537; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 538; GFX8-PACKED-NEXT: v_lshrrev_b32_e32 v3, 16, v0 539; GFX8-PACKED-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 540; GFX8-PACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3 541; GFX8-PACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 542; GFX8-PACKED-NEXT: ; return to shader part epilog 543; 544; GFX9-LABEL: load_1d_v3f16_xyz: 545; GFX9: ; %bb.0: 546; GFX9-NEXT: s_mov_b32 s0, s2 547; GFX9-NEXT: s_mov_b32 s1, s3 548; GFX9-NEXT: s_mov_b32 s2, s4 549; GFX9-NEXT: s_mov_b32 s3, s5 550; GFX9-NEXT: s_mov_b32 s4, s6 551; GFX9-NEXT: s_mov_b32 s5, s7 552; GFX9-NEXT: s_mov_b32 s6, s8 553; GFX9-NEXT: s_mov_b32 s7, s9 554; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16 555; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 556; GFX9-NEXT: s_lshl_b32 s0, s0, 16 557; GFX9-NEXT: s_waitcnt vmcnt(0) 558; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 559; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 560; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s0 561; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v3 562; GFX9-NEXT: ; return to shader part epilog 563; 564; GFX10-LABEL: load_1d_v3f16_xyz: 565; GFX10: ; %bb.0: 566; GFX10-NEXT: s_mov_b32 s0, s2 567; GFX10-NEXT: s_mov_b32 s1, s3 568; GFX10-NEXT: s_mov_b32 s2, s4 569; GFX10-NEXT: s_mov_b32 s3, s5 570; GFX10-NEXT: s_mov_b32 s4, s6 571; GFX10-NEXT: s_mov_b32 s5, s7 572; GFX10-NEXT: s_mov_b32 s6, s8 573; GFX10-NEXT: s_mov_b32 s7, s9 574; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff 575; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16 576; GFX10-NEXT: s_waitcnt_depctr 0xffe3 577; GFX10-NEXT: s_lshl_b32 s0, s0, 16 578; GFX10-NEXT: s_waitcnt vmcnt(0) 579; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 580; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s0 581; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 582; GFX10-NEXT: v_and_or_b32 v0, v0, v3, v2 583; GFX10-NEXT: ; return to shader part epilog 584 %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 585 ret <3 x half> %v 586} 587 588define amdgpu_ps <4 x half> @load_1d_v4f16_xyzw(<8 x i32> inreg %rsrc, i32 %s) { 589; GFX8-UNPACKED-LABEL: load_1d_v4f16_xyzw: 590; GFX8-UNPACKED: ; %bb.0: 591; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 592; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 593; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 594; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 595; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 596; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 597; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 598; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 599; GFX8-UNPACKED-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm d16 600; GFX8-UNPACKED-NEXT: s_mov_b32 s0, 0xffff 601; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 602; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, s0, v1 603; GFX8-UNPACKED-NEXT: v_and_b32_e32 v3, s0, v3 604; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 605; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v3, 16, v3 606; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 607; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 608; GFX8-UNPACKED-NEXT: ; return to shader part epilog 609; 610; GFX8-PACKED-LABEL: load_1d_v4f16_xyzw: 611; GFX8-PACKED: ; %bb.0: 612; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 613; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 614; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 615; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 616; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 617; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 618; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 619; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 620; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16 621; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 622; GFX8-PACKED-NEXT: ; return to shader part epilog 623; 624; GFX9-LABEL: load_1d_v4f16_xyzw: 625; GFX9: ; %bb.0: 626; GFX9-NEXT: s_mov_b32 s0, s2 627; GFX9-NEXT: s_mov_b32 s1, s3 628; GFX9-NEXT: s_mov_b32 s2, s4 629; GFX9-NEXT: s_mov_b32 s3, s5 630; GFX9-NEXT: s_mov_b32 s4, s6 631; GFX9-NEXT: s_mov_b32 s5, s7 632; GFX9-NEXT: s_mov_b32 s6, s8 633; GFX9-NEXT: s_mov_b32 s7, s9 634; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16 635; GFX9-NEXT: s_waitcnt vmcnt(0) 636; GFX9-NEXT: ; return to shader part epilog 637; 638; GFX10-LABEL: load_1d_v4f16_xyzw: 639; GFX10: ; %bb.0: 640; GFX10-NEXT: s_mov_b32 s0, s2 641; GFX10-NEXT: s_mov_b32 s1, s3 642; GFX10-NEXT: s_mov_b32 s2, s4 643; GFX10-NEXT: s_mov_b32 s3, s5 644; GFX10-NEXT: s_mov_b32 s4, s6 645; GFX10-NEXT: s_mov_b32 s5, s7 646; GFX10-NEXT: s_mov_b32 s6, s8 647; GFX10-NEXT: s_mov_b32 s7, s9 648; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm d16 649; GFX10-NEXT: s_waitcnt vmcnt(0) 650; GFX10-NEXT: ; return to shader part epilog 651 %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 652 ret <4 x half> %v 653} 654 655define amdgpu_ps float @load_1d_f16_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) { 656; GFX8-UNPACKED-LABEL: load_1d_f16_tfe_dmask_x: 657; GFX8-UNPACKED: ; %bb.0: 658; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 659; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 660; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 661; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 662; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 663; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 664; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 665; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 666; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe d16 667; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 668; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v1 669; GFX8-UNPACKED-NEXT: ; return to shader part epilog 670; 671; GFX8-PACKED-LABEL: load_1d_f16_tfe_dmask_x: 672; GFX8-PACKED: ; %bb.0: 673; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 674; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 675; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 676; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 677; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 678; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 679; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 680; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 681; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe d16 682; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 683; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v1 684; GFX8-PACKED-NEXT: ; return to shader part epilog 685; 686; GFX9-LABEL: load_1d_f16_tfe_dmask_x: 687; GFX9: ; %bb.0: 688; GFX9-NEXT: s_mov_b32 s0, s2 689; GFX9-NEXT: s_mov_b32 s1, s3 690; GFX9-NEXT: s_mov_b32 s2, s4 691; GFX9-NEXT: s_mov_b32 s3, s5 692; GFX9-NEXT: s_mov_b32 s4, s6 693; GFX9-NEXT: s_mov_b32 s5, s7 694; GFX9-NEXT: s_mov_b32 s6, s8 695; GFX9-NEXT: s_mov_b32 s7, s9 696; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe d16 697; GFX9-NEXT: s_waitcnt vmcnt(0) 698; GFX9-NEXT: v_mov_b32_e32 v0, v1 699; GFX9-NEXT: ; return to shader part epilog 700; 701; GFX10-LABEL: load_1d_f16_tfe_dmask_x: 702; GFX10: ; %bb.0: 703; GFX10-NEXT: s_mov_b32 s0, s2 704; GFX10-NEXT: s_mov_b32 s1, s3 705; GFX10-NEXT: s_mov_b32 s2, s4 706; GFX10-NEXT: s_mov_b32 s3, s5 707; GFX10-NEXT: s_mov_b32 s4, s6 708; GFX10-NEXT: s_mov_b32 s5, s7 709; GFX10-NEXT: s_mov_b32 s6, s8 710; GFX10-NEXT: s_mov_b32 s7, s9 711; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16 712; GFX10-NEXT: s_waitcnt vmcnt(0) 713; GFX10-NEXT: v_mov_b32_e32 v0, v1 714; GFX10-NEXT: ; return to shader part epilog 715 %v = call { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 716 %v.err = extractvalue { half, i32 } %v, 1 717 %vv = bitcast i32 %v.err to float 718 ret float %vv 719} 720 721define amdgpu_ps float @load_1d_v2f16_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) { 722; GFX8-UNPACKED-LABEL: load_1d_v2f16_tfe_dmask_xy: 723; GFX8-UNPACKED: ; %bb.0: 724; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 725; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 726; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 727; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 728; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 729; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 730; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 731; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 732; GFX8-UNPACKED-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x3 unorm tfe d16 733; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 734; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v2 735; GFX8-UNPACKED-NEXT: ; return to shader part epilog 736; 737; GFX8-PACKED-LABEL: load_1d_v2f16_tfe_dmask_xy: 738; GFX8-PACKED: ; %bb.0: 739; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 740; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 741; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 742; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 743; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 744; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 745; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 746; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 747; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm tfe d16 748; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 749; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v1 750; GFX8-PACKED-NEXT: ; return to shader part epilog 751; 752; GFX9-LABEL: load_1d_v2f16_tfe_dmask_xy: 753; GFX9: ; %bb.0: 754; GFX9-NEXT: s_mov_b32 s0, s2 755; GFX9-NEXT: s_mov_b32 s1, s3 756; GFX9-NEXT: s_mov_b32 s2, s4 757; GFX9-NEXT: s_mov_b32 s3, s5 758; GFX9-NEXT: s_mov_b32 s4, s6 759; GFX9-NEXT: s_mov_b32 s5, s7 760; GFX9-NEXT: s_mov_b32 s6, s8 761; GFX9-NEXT: s_mov_b32 s7, s9 762; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm tfe d16 763; GFX9-NEXT: s_waitcnt vmcnt(0) 764; GFX9-NEXT: v_mov_b32_e32 v0, v1 765; GFX9-NEXT: ; return to shader part epilog 766; 767; GFX10-LABEL: load_1d_v2f16_tfe_dmask_xy: 768; GFX10: ; %bb.0: 769; GFX10-NEXT: s_mov_b32 s0, s2 770; GFX10-NEXT: s_mov_b32 s1, s3 771; GFX10-NEXT: s_mov_b32 s2, s4 772; GFX10-NEXT: s_mov_b32 s3, s5 773; GFX10-NEXT: s_mov_b32 s4, s6 774; GFX10-NEXT: s_mov_b32 s5, s7 775; GFX10-NEXT: s_mov_b32 s6, s8 776; GFX10-NEXT: s_mov_b32 s7, s9 777; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe d16 778; GFX10-NEXT: s_waitcnt vmcnt(0) 779; GFX10-NEXT: v_mov_b32_e32 v0, v1 780; GFX10-NEXT: ; return to shader part epilog 781 %v = call { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 782 %v.err = extractvalue { <2 x half>, i32 } %v, 1 783 %vv = bitcast i32 %v.err to float 784 ret float %vv 785} 786 787define amdgpu_ps float @load_1d_v3f16_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) { 788; GFX8-UNPACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz: 789; GFX8-UNPACKED: ; %bb.0: 790; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 791; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 792; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 793; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 794; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 795; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 796; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 797; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 798; GFX8-UNPACKED-NEXT: image_load v[0:3], v0, s[0:7] dmask:0x7 unorm tfe d16 799; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 800; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v3 801; GFX8-UNPACKED-NEXT: ; return to shader part epilog 802; 803; GFX8-PACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz: 804; GFX8-PACKED: ; %bb.0: 805; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 806; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 807; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 808; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 809; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 810; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 811; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 812; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 813; GFX8-PACKED-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm tfe d16 814; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 815; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v2 816; GFX8-PACKED-NEXT: ; return to shader part epilog 817; 818; GFX9-LABEL: load_1d_v3f16_tfe_dmask_xyz: 819; GFX9: ; %bb.0: 820; GFX9-NEXT: s_mov_b32 s0, s2 821; GFX9-NEXT: s_mov_b32 s1, s3 822; GFX9-NEXT: s_mov_b32 s2, s4 823; GFX9-NEXT: s_mov_b32 s3, s5 824; GFX9-NEXT: s_mov_b32 s4, s6 825; GFX9-NEXT: s_mov_b32 s5, s7 826; GFX9-NEXT: s_mov_b32 s6, s8 827; GFX9-NEXT: s_mov_b32 s7, s9 828; GFX9-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm tfe d16 829; GFX9-NEXT: s_waitcnt vmcnt(0) 830; GFX9-NEXT: v_mov_b32_e32 v0, v2 831; GFX9-NEXT: ; return to shader part epilog 832; 833; GFX10-LABEL: load_1d_v3f16_tfe_dmask_xyz: 834; GFX10: ; %bb.0: 835; GFX10-NEXT: s_mov_b32 s0, s2 836; GFX10-NEXT: s_mov_b32 s1, s3 837; GFX10-NEXT: s_mov_b32 s2, s4 838; GFX10-NEXT: s_mov_b32 s3, s5 839; GFX10-NEXT: s_mov_b32 s4, s6 840; GFX10-NEXT: s_mov_b32 s5, s7 841; GFX10-NEXT: s_mov_b32 s6, s8 842; GFX10-NEXT: s_mov_b32 s7, s9 843; GFX10-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe d16 844; GFX10-NEXT: s_waitcnt vmcnt(0) 845; GFX10-NEXT: v_mov_b32_e32 v0, v2 846; GFX10-NEXT: ; return to shader part epilog 847 %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 848 %v.err = extractvalue { <3 x half>, i32 } %v, 1 849 %vv = bitcast i32 %v.err to float 850 ret float %vv 851} 852 853define amdgpu_ps float @load_1d_v4f16_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) { 854; GFX8-UNPACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 855; GFX8-UNPACKED: ; %bb.0: 856; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 857; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 858; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 859; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 860; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 861; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 862; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 863; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 864; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe d16 865; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 866; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v1 867; GFX8-UNPACKED-NEXT: ; return to shader part epilog 868; 869; GFX8-PACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 870; GFX8-PACKED: ; %bb.0: 871; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 872; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 873; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 874; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 875; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 876; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 877; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 878; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 879; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe d16 880; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 881; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v1 882; GFX8-PACKED-NEXT: ; return to shader part epilog 883; 884; GFX9-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 885; GFX9: ; %bb.0: 886; GFX9-NEXT: s_mov_b32 s0, s2 887; GFX9-NEXT: s_mov_b32 s1, s3 888; GFX9-NEXT: s_mov_b32 s2, s4 889; GFX9-NEXT: s_mov_b32 s3, s5 890; GFX9-NEXT: s_mov_b32 s4, s6 891; GFX9-NEXT: s_mov_b32 s5, s7 892; GFX9-NEXT: s_mov_b32 s6, s8 893; GFX9-NEXT: s_mov_b32 s7, s9 894; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe d16 895; GFX9-NEXT: s_waitcnt vmcnt(0) 896; GFX9-NEXT: v_mov_b32_e32 v0, v1 897; GFX9-NEXT: ; return to shader part epilog 898; 899; GFX10-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 900; GFX10: ; %bb.0: 901; GFX10-NEXT: s_mov_b32 s0, s2 902; GFX10-NEXT: s_mov_b32 s1, s3 903; GFX10-NEXT: s_mov_b32 s2, s4 904; GFX10-NEXT: s_mov_b32 s3, s5 905; GFX10-NEXT: s_mov_b32 s4, s6 906; GFX10-NEXT: s_mov_b32 s5, s7 907; GFX10-NEXT: s_mov_b32 s6, s8 908; GFX10-NEXT: s_mov_b32 s7, s9 909; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe d16 910; GFX10-NEXT: s_waitcnt vmcnt(0) 911; GFX10-NEXT: v_mov_b32_e32 v0, v1 912; GFX10-NEXT: ; return to shader part epilog 913 %v = call { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 914 %v.err = extractvalue { <4 x half>, i32 } %v, 1 915 %vv = bitcast i32 %v.err to float 916 ret float %vv 917} 918 919declare half @llvm.amdgcn.image.load.1d.half.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 920declare <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 921declare <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 922declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 923 924declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 925declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 926declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 927declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 928 929attributes #0 = { nounwind readonly } 930