1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 4 5define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { 6; GFX9-LABEL: gather4_2d: 7; GFX9: ; %bb.0: ; %main_body 8; GFX9-NEXT: s_mov_b64 s[12:13], exec 9; GFX9-NEXT: s_wqm_b64 exec, exec 10; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 11; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 12; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 13; GFX9-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16 14; GFX9-NEXT: s_waitcnt vmcnt(0) 15; GFX9-NEXT: ; return to shader part epilog 16; 17; GFX10-LABEL: gather4_2d: 18; GFX10: ; %bb.0: ; %main_body 19; GFX10-NEXT: s_mov_b32 s12, exec_lo 20; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 21; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 22; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 23; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 24; GFX10-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 25; GFX10-NEXT: s_waitcnt vmcnt(0) 26; GFX10-NEXT: ; return to shader part epilog 27main_body: 28 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 29 ret <4 x float> %v 30} 31 32define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { 33; GFX9-LABEL: gather4_cube: 34; GFX9: ; %bb.0: ; %main_body 35; GFX9-NEXT: s_mov_b64 s[12:13], exec 36; GFX9-NEXT: s_wqm_b64 exec, exec 37; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 38; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 39; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 40; GFX9-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da 41; GFX9-NEXT: s_waitcnt vmcnt(0) 42; GFX9-NEXT: ; return to shader part epilog 43; 44; GFX10-LABEL: gather4_cube: 45; GFX10: ; %bb.0: ; %main_body 46; GFX10-NEXT: s_mov_b32 s12, exec_lo 47; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 48; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 49; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 50; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 51; GFX10-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16 52; GFX10-NEXT: s_waitcnt vmcnt(0) 53; GFX10-NEXT: ; return to shader part epilog 54main_body: 55 %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 56 ret <4 x float> %v 57} 58 59define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { 60; GFX9-LABEL: gather4_2darray: 61; GFX9: ; %bb.0: ; %main_body 62; GFX9-NEXT: s_mov_b64 s[12:13], exec 63; GFX9-NEXT: s_wqm_b64 exec, exec 64; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 65; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 66; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 67; GFX9-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da 68; GFX9-NEXT: s_waitcnt vmcnt(0) 69; GFX9-NEXT: ; return to shader part epilog 70; 71; GFX10-LABEL: gather4_2darray: 72; GFX10: ; %bb.0: ; %main_body 73; GFX10-NEXT: s_mov_b32 s12, exec_lo 74; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 75; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 76; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 77; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 78; GFX10-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16 79; GFX10-NEXT: s_waitcnt vmcnt(0) 80; GFX10-NEXT: ; return to shader part epilog 81main_body: 82 %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 83 ret <4 x float> %v 84} 85 86define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { 87; GFX9-LABEL: gather4_c_2d: 88; GFX9: ; %bb.0: ; %main_body 89; GFX9-NEXT: s_mov_b64 s[12:13], exec 90; GFX9-NEXT: s_wqm_b64 exec, exec 91; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 92; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 93; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 94; GFX9-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 95; GFX9-NEXT: s_waitcnt vmcnt(0) 96; GFX9-NEXT: ; return to shader part epilog 97; 98; GFX10-LABEL: gather4_c_2d: 99; GFX10: ; %bb.0: ; %main_body 100; GFX10-NEXT: s_mov_b32 s12, exec_lo 101; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 102; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 103; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 104; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 105; GFX10-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 106; GFX10-NEXT: s_waitcnt vmcnt(0) 107; GFX10-NEXT: ; return to shader part epilog 108main_body: 109 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 110 ret <4 x float> %v 111} 112 113define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) { 114; GFX9-LABEL: gather4_cl_2d: 115; GFX9: ; %bb.0: ; %main_body 116; GFX9-NEXT: s_mov_b64 s[12:13], exec 117; GFX9-NEXT: s_wqm_b64 exec, exec 118; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 119; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 120; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 121; GFX9-NEXT: image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 122; GFX9-NEXT: s_waitcnt vmcnt(0) 123; GFX9-NEXT: ; return to shader part epilog 124; 125; GFX10-LABEL: gather4_cl_2d: 126; GFX10: ; %bb.0: ; %main_body 127; GFX10-NEXT: s_mov_b32 s12, exec_lo 128; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 129; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 130; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 131; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 132; GFX10-NEXT: image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 133; GFX10-NEXT: s_waitcnt vmcnt(0) 134; GFX10-NEXT: ; return to shader part epilog 135main_body: 136 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 137 ret <4 x float> %v 138} 139 140define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) { 141; GFX9-LABEL: gather4_c_cl_2d: 142; GFX9: ; %bb.0: ; %main_body 143; GFX9-NEXT: s_mov_b64 s[12:13], exec 144; GFX9-NEXT: s_wqm_b64 exec, exec 145; GFX9-NEXT: v_mov_b32_e32 v5, v3 146; GFX9-NEXT: v_mov_b32_e32 v3, v0 147; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1 148; GFX9-NEXT: v_lshl_or_b32 v4, v2, 16, v0 149; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 150; GFX9-NEXT: image_gather4_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16 151; GFX9-NEXT: s_waitcnt vmcnt(0) 152; GFX9-NEXT: ; return to shader part epilog 153; 154; GFX10-LABEL: gather4_c_cl_2d: 155; GFX10: ; %bb.0: ; %main_body 156; GFX10-NEXT: s_mov_b32 s12, exec_lo 157; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 158; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 159; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 160; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 161; GFX10-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 162; GFX10-NEXT: s_waitcnt vmcnt(0) 163; GFX10-NEXT: ; return to shader part epilog 164main_body: 165 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 166 ret <4 x float> %v 167} 168 169define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) { 170; GFX9-LABEL: gather4_b_2d: 171; GFX9: ; %bb.0: ; %main_body 172; GFX9-NEXT: s_mov_b64 s[12:13], exec 173; GFX9-NEXT: s_wqm_b64 exec, exec 174; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 175; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 176; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 177; GFX9-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 178; GFX9-NEXT: s_waitcnt vmcnt(0) 179; GFX9-NEXT: ; return to shader part epilog 180; 181; GFX10-LABEL: gather4_b_2d: 182; GFX10: ; %bb.0: ; %main_body 183; GFX10-NEXT: s_mov_b32 s12, exec_lo 184; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 185; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 186; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 187; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 188; GFX10-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 189; GFX10-NEXT: s_waitcnt vmcnt(0) 190; GFX10-NEXT: ; return to shader part epilog 191main_body: 192 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 193 ret <4 x float> %v 194} 195 196define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) { 197; GFX9-LABEL: gather4_c_b_2d: 198; GFX9: ; %bb.0: ; %main_body 199; GFX9-NEXT: s_mov_b64 s[12:13], exec 200; GFX9-NEXT: s_wqm_b64 exec, exec 201; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 202; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 203; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 204; GFX9-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16 205; GFX9-NEXT: s_waitcnt vmcnt(0) 206; GFX9-NEXT: ; return to shader part epilog 207; 208; GFX10-LABEL: gather4_c_b_2d: 209; GFX10: ; %bb.0: ; %main_body 210; GFX10-NEXT: s_mov_b32 s12, exec_lo 211; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 212; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 213; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 214; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 215; GFX10-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 216; GFX10-NEXT: s_waitcnt vmcnt(0) 217; GFX10-NEXT: ; return to shader part epilog 218main_body: 219 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 220 ret <4 x float> %v 221} 222 223define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) { 224; GFX9-LABEL: gather4_b_cl_2d: 225; GFX9: ; %bb.0: ; %main_body 226; GFX9-NEXT: s_mov_b64 s[12:13], exec 227; GFX9-NEXT: s_wqm_b64 exec, exec 228; GFX9-NEXT: v_mov_b32_e32 v5, v3 229; GFX9-NEXT: v_mov_b32_e32 v3, v0 230; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1 231; GFX9-NEXT: v_lshl_or_b32 v4, v2, 16, v0 232; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 233; GFX9-NEXT: image_gather4_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16 234; GFX9-NEXT: s_waitcnt vmcnt(0) 235; GFX9-NEXT: ; return to shader part epilog 236; 237; GFX10-LABEL: gather4_b_cl_2d: 238; GFX10: ; %bb.0: ; %main_body 239; GFX10-NEXT: s_mov_b32 s12, exec_lo 240; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 241; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 242; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 243; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 244; GFX10-NEXT: image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 245; GFX10-NEXT: s_waitcnt vmcnt(0) 246; GFX10-NEXT: ; return to shader part epilog 247main_body: 248 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 249 ret <4 x float> %v 250} 251 252define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) { 253; GFX9-LABEL: gather4_c_b_cl_2d: 254; GFX9: ; %bb.0: ; %main_body 255; GFX9-NEXT: s_mov_b64 s[12:13], exec 256; GFX9-NEXT: s_wqm_b64 exec, exec 257; GFX9-NEXT: v_mov_b32_e32 v7, v4 258; GFX9-NEXT: v_mov_b32_e32 v4, v0 259; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v2 260; GFX9-NEXT: v_mov_b32_e32 v5, v1 261; GFX9-NEXT: v_lshl_or_b32 v6, v3, 16, v0 262; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 263; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0x1 a16 264; GFX9-NEXT: s_waitcnt vmcnt(0) 265; GFX9-NEXT: ; return to shader part epilog 266; 267; GFX10-LABEL: gather4_c_b_cl_2d: 268; GFX10: ; %bb.0: ; %main_body 269; GFX10-NEXT: s_mov_b32 s12, exec_lo 270; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 271; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 272; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 273; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 274; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 275; GFX10-NEXT: s_waitcnt vmcnt(0) 276; GFX10-NEXT: ; return to shader part epilog 277main_body: 278 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 279 ret <4 x float> %v 280} 281 282define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { 283; GFX9-LABEL: gather4_l_2d: 284; GFX9: ; %bb.0: ; %main_body 285; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 286; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 287; GFX9-NEXT: image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 288; GFX9-NEXT: s_waitcnt vmcnt(0) 289; GFX9-NEXT: ; return to shader part epilog 290; 291; GFX10-LABEL: gather4_l_2d: 292; GFX10: ; %bb.0: ; %main_body 293; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 294; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 295; GFX10-NEXT: image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 296; GFX10-NEXT: s_waitcnt vmcnt(0) 297; GFX10-NEXT: ; return to shader part epilog 298main_body: 299 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 300 ret <4 x float> %v 301} 302 303define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { 304; GFX9-LABEL: gather4_c_l_2d: 305; GFX9: ; %bb.0: ; %main_body 306; GFX9-NEXT: v_mov_b32_e32 v5, v3 307; GFX9-NEXT: v_mov_b32_e32 v3, v0 308; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1 309; GFX9-NEXT: v_lshl_or_b32 v4, v2, 16, v0 310; GFX9-NEXT: image_gather4_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16 311; GFX9-NEXT: s_waitcnt vmcnt(0) 312; GFX9-NEXT: ; return to shader part epilog 313; 314; GFX10-LABEL: gather4_c_l_2d: 315; GFX10: ; %bb.0: ; %main_body 316; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 317; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 318; GFX10-NEXT: image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 319; GFX10-NEXT: s_waitcnt vmcnt(0) 320; GFX10-NEXT: ; return to shader part epilog 321main_body: 322 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 323 ret <4 x float> %v 324} 325 326define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { 327; GFX9-LABEL: gather4_lz_2d: 328; GFX9: ; %bb.0: ; %main_body 329; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 330; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 331; GFX9-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16 332; GFX9-NEXT: s_waitcnt vmcnt(0) 333; GFX9-NEXT: ; return to shader part epilog 334; 335; GFX10-LABEL: gather4_lz_2d: 336; GFX10: ; %bb.0: ; %main_body 337; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 338; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 339; GFX10-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 340; GFX10-NEXT: s_waitcnt vmcnt(0) 341; GFX10-NEXT: ; return to shader part epilog 342main_body: 343 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 344 ret <4 x float> %v 345} 346 347define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { 348; GFX9-LABEL: gather4_c_lz_2d: 349; GFX9: ; %bb.0: ; %main_body 350; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 351; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 352; GFX9-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 353; GFX9-NEXT: s_waitcnt vmcnt(0) 354; GFX9-NEXT: ; return to shader part epilog 355; 356; GFX10-LABEL: gather4_c_lz_2d: 357; GFX10: ; %bb.0: ; %main_body 358; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 359; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 360; GFX10-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 361; GFX10-NEXT: s_waitcnt vmcnt(0) 362; GFX10-NEXT: ; return to shader part epilog 363main_body: 364 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 365 ret <4 x float> %v 366} 367 368declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 369declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 370declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 371 372declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 373declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 374declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 375 376declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 377declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 378declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 379declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 380 381declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 382declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 383 384declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 385declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 386 387attributes #0 = { nounwind } 388attributes #1 = { nounwind readonly } 389attributes #2 = { nounwind readnone } 390