1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 4 5define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { 6; GFX9-LABEL: sample_1d: 7; GFX9: ; %bb.0: ; %main_body 8; GFX9-NEXT: s_mov_b64 s[12:13], exec 9; GFX9-NEXT: s_wqm_b64 exec, exec 10; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 11; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 12; GFX9-NEXT: s_waitcnt vmcnt(0) 13; GFX9-NEXT: ; return to shader part epilog 14; 15; GFX10-LABEL: sample_1d: 16; GFX10: ; %bb.0: ; %main_body 17; GFX10-NEXT: s_mov_b32 s12, exec_lo 18; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 19; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 20; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 21; GFX10-NEXT: s_waitcnt vmcnt(0) 22; GFX10-NEXT: ; return to shader part epilog 23main_body: 24 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 25 ret <4 x float> %v 26} 27 28define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { 29; GFX9-LABEL: sample_2d: 30; GFX9: ; %bb.0: ; %main_body 31; GFX9-NEXT: s_mov_b64 s[12:13], exec 32; GFX9-NEXT: s_wqm_b64 exec, exec 33; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 34; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 35; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 36; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 37; GFX9-NEXT: s_waitcnt vmcnt(0) 38; GFX9-NEXT: ; return to shader part epilog 39; 40; GFX10-LABEL: sample_2d: 41; GFX10: ; %bb.0: ; %main_body 42; GFX10-NEXT: s_mov_b32 s12, exec_lo 43; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 44; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 45; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 46; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 47; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 48; GFX10-NEXT: s_waitcnt vmcnt(0) 49; GFX10-NEXT: ; return to shader part epilog 50main_body: 51 %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 52 ret <4 x float> %v 53} 54 55define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) { 56; GFX9-LABEL: sample_3d: 57; GFX9: ; %bb.0: ; %main_body 58; GFX9-NEXT: s_mov_b64 s[12:13], exec 59; GFX9-NEXT: s_wqm_b64 exec, exec 60; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 61; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 62; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 63; GFX9-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 64; GFX9-NEXT: s_waitcnt vmcnt(0) 65; GFX9-NEXT: ; return to shader part epilog 66; 67; GFX10-LABEL: sample_3d: 68; GFX10: ; %bb.0: ; %main_body 69; GFX10-NEXT: s_mov_b32 s12, exec_lo 70; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 71; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 72; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 73; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 74; GFX10-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 75; GFX10-NEXT: s_waitcnt vmcnt(0) 76; GFX10-NEXT: ; return to shader part epilog 77main_body: 78 %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 79 ret <4 x float> %v 80} 81 82define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { 83; GFX9-LABEL: sample_cube: 84; GFX9: ; %bb.0: ; %main_body 85; GFX9-NEXT: s_mov_b64 s[12:13], exec 86; GFX9-NEXT: s_wqm_b64 exec, exec 87; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 88; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 89; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 90; GFX9-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da 91; GFX9-NEXT: s_waitcnt vmcnt(0) 92; GFX9-NEXT: ; return to shader part epilog 93; 94; GFX10-LABEL: sample_cube: 95; GFX10: ; %bb.0: ; %main_body 96; GFX10-NEXT: s_mov_b32 s12, exec_lo 97; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 98; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 99; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 100; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 101; GFX10-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16 102; GFX10-NEXT: s_waitcnt vmcnt(0) 103; GFX10-NEXT: ; return to shader part epilog 104main_body: 105 %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 106 ret <4 x float> %v 107} 108 109define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) { 110; GFX9-LABEL: sample_1darray: 111; GFX9: ; %bb.0: ; %main_body 112; GFX9-NEXT: s_mov_b64 s[12:13], exec 113; GFX9-NEXT: s_wqm_b64 exec, exec 114; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 115; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 116; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 117; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da 118; GFX9-NEXT: s_waitcnt vmcnt(0) 119; GFX9-NEXT: ; return to shader part epilog 120; 121; GFX10-LABEL: sample_1darray: 122; GFX10: ; %bb.0: ; %main_body 123; GFX10-NEXT: s_mov_b32 s12, exec_lo 124; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 125; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 126; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 127; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 128; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY a16 129; GFX10-NEXT: s_waitcnt vmcnt(0) 130; GFX10-NEXT: ; return to shader part epilog 131main_body: 132 %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 133 ret <4 x float> %v 134} 135 136define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { 137; GFX9-LABEL: sample_2darray: 138; GFX9: ; %bb.0: ; %main_body 139; GFX9-NEXT: s_mov_b64 s[12:13], exec 140; GFX9-NEXT: s_wqm_b64 exec, exec 141; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 142; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 143; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 144; GFX9-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da 145; GFX9-NEXT: s_waitcnt vmcnt(0) 146; GFX9-NEXT: ; return to shader part epilog 147; 148; GFX10-LABEL: sample_2darray: 149; GFX10: ; %bb.0: ; %main_body 150; GFX10-NEXT: s_mov_b32 s12, exec_lo 151; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 152; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 153; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 154; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 155; GFX10-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16 156; GFX10-NEXT: s_waitcnt vmcnt(0) 157; GFX10-NEXT: ; return to shader part epilog 158main_body: 159 %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 160 ret <4 x float> %v 161} 162 163define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { 164; GFX9-LABEL: sample_c_1d: 165; GFX9: ; %bb.0: ; %main_body 166; GFX9-NEXT: s_mov_b64 s[12:13], exec 167; GFX9-NEXT: s_wqm_b64 exec, exec 168; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 169; GFX9-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 170; GFX9-NEXT: s_waitcnt vmcnt(0) 171; GFX9-NEXT: ; return to shader part epilog 172; 173; GFX10-LABEL: sample_c_1d: 174; GFX10: ; %bb.0: ; %main_body 175; GFX10-NEXT: s_mov_b32 s12, exec_lo 176; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 177; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 178; GFX10-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 179; GFX10-NEXT: s_waitcnt vmcnt(0) 180; GFX10-NEXT: ; return to shader part epilog 181main_body: 182 %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 183 ret <4 x float> %v 184} 185 186define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { 187; GFX9-LABEL: sample_c_2d: 188; GFX9: ; %bb.0: ; %main_body 189; GFX9-NEXT: s_mov_b64 s[12:13], exec 190; GFX9-NEXT: s_wqm_b64 exec, exec 191; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 192; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 193; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 194; GFX9-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 195; GFX9-NEXT: s_waitcnt vmcnt(0) 196; GFX9-NEXT: ; return to shader part epilog 197; 198; GFX10-LABEL: sample_c_2d: 199; GFX10: ; %bb.0: ; %main_body 200; GFX10-NEXT: s_mov_b32 s12, exec_lo 201; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 202; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 203; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 204; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 205; GFX10-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 206; GFX10-NEXT: s_waitcnt vmcnt(0) 207; GFX10-NEXT: ; return to shader part epilog 208main_body: 209 %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 210 ret <4 x float> %v 211} 212 213define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) { 214; GFX9-LABEL: sample_cl_1d: 215; GFX9: ; %bb.0: ; %main_body 216; GFX9-NEXT: s_mov_b64 s[12:13], exec 217; GFX9-NEXT: s_wqm_b64 exec, exec 218; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 219; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 220; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 221; GFX9-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 222; GFX9-NEXT: s_waitcnt vmcnt(0) 223; GFX9-NEXT: ; return to shader part epilog 224; 225; GFX10-LABEL: sample_cl_1d: 226; GFX10: ; %bb.0: ; %main_body 227; GFX10-NEXT: s_mov_b32 s12, exec_lo 228; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 229; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 230; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 231; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 232; GFX10-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 233; GFX10-NEXT: s_waitcnt vmcnt(0) 234; GFX10-NEXT: ; return to shader part epilog 235main_body: 236 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 237 ret <4 x float> %v 238} 239 240define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) { 241; GFX9-LABEL: sample_cl_2d: 242; GFX9: ; %bb.0: ; %main_body 243; GFX9-NEXT: s_mov_b64 s[12:13], exec 244; GFX9-NEXT: s_wqm_b64 exec, exec 245; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 246; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 247; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 248; GFX9-NEXT: image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 249; GFX9-NEXT: s_waitcnt vmcnt(0) 250; GFX9-NEXT: ; return to shader part epilog 251; 252; GFX10-LABEL: sample_cl_2d: 253; GFX10: ; %bb.0: ; %main_body 254; GFX10-NEXT: s_mov_b32 s12, exec_lo 255; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 256; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 257; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 258; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 259; GFX10-NEXT: image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 260; GFX10-NEXT: s_waitcnt vmcnt(0) 261; GFX10-NEXT: ; return to shader part epilog 262main_body: 263 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 264 ret <4 x float> %v 265} 266 267define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) { 268; GFX9-LABEL: sample_c_cl_1d: 269; GFX9: ; %bb.0: ; %main_body 270; GFX9-NEXT: s_mov_b64 s[12:13], exec 271; GFX9-NEXT: s_wqm_b64 exec, exec 272; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 273; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 274; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 275; GFX9-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 276; GFX9-NEXT: s_waitcnt vmcnt(0) 277; GFX9-NEXT: ; return to shader part epilog 278; 279; GFX10-LABEL: sample_c_cl_1d: 280; GFX10: ; %bb.0: ; %main_body 281; GFX10-NEXT: s_mov_b32 s12, exec_lo 282; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 283; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 284; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 285; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 286; GFX10-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 287; GFX10-NEXT: s_waitcnt vmcnt(0) 288; GFX10-NEXT: ; return to shader part epilog 289main_body: 290 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 291 ret <4 x float> %v 292} 293 294define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) { 295; GFX9-LABEL: sample_c_cl_2d: 296; GFX9: ; %bb.0: ; %main_body 297; GFX9-NEXT: s_mov_b64 s[12:13], exec 298; GFX9-NEXT: s_wqm_b64 exec, exec 299; GFX9-NEXT: v_mov_b32_e32 v5, v3 300; GFX9-NEXT: v_mov_b32_e32 v3, v0 301; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1 302; GFX9-NEXT: v_lshl_or_b32 v4, v2, 16, v0 303; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 304; GFX9-NEXT: image_sample_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16 305; GFX9-NEXT: s_waitcnt vmcnt(0) 306; GFX9-NEXT: ; return to shader part epilog 307; 308; GFX10-LABEL: sample_c_cl_2d: 309; GFX10: ; %bb.0: ; %main_body 310; GFX10-NEXT: s_mov_b32 s12, exec_lo 311; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 312; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 313; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 314; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 315; GFX10-NEXT: image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 316; GFX10-NEXT: s_waitcnt vmcnt(0) 317; GFX10-NEXT: ; return to shader part epilog 318main_body: 319 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 320 ret <4 x float> %v 321} 322 323define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) { 324; GFX9-LABEL: sample_b_1d: 325; GFX9: ; %bb.0: ; %main_body 326; GFX9-NEXT: s_mov_b64 s[12:13], exec 327; GFX9-NEXT: s_wqm_b64 exec, exec 328; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 329; GFX9-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 330; GFX9-NEXT: s_waitcnt vmcnt(0) 331; GFX9-NEXT: ; return to shader part epilog 332; 333; GFX10-LABEL: sample_b_1d: 334; GFX10: ; %bb.0: ; %main_body 335; GFX10-NEXT: s_mov_b32 s12, exec_lo 336; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 337; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 338; GFX10-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 339; GFX10-NEXT: s_waitcnt vmcnt(0) 340; GFX10-NEXT: ; return to shader part epilog 341main_body: 342 %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 343 ret <4 x float> %v 344} 345 346define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) { 347; GFX9-LABEL: sample_b_2d: 348; GFX9: ; %bb.0: ; %main_body 349; GFX9-NEXT: s_mov_b64 s[12:13], exec 350; GFX9-NEXT: s_wqm_b64 exec, exec 351; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 352; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 353; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 354; GFX9-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 355; GFX9-NEXT: s_waitcnt vmcnt(0) 356; GFX9-NEXT: ; return to shader part epilog 357; 358; GFX10-LABEL: sample_b_2d: 359; GFX10: ; %bb.0: ; %main_body 360; GFX10-NEXT: s_mov_b32 s12, exec_lo 361; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 362; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 363; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 364; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 365; GFX10-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 366; GFX10-NEXT: s_waitcnt vmcnt(0) 367; GFX10-NEXT: ; return to shader part epilog 368main_body: 369 %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 370 ret <4 x float> %v 371} 372 373define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) { 374; GFX9-LABEL: sample_c_b_1d: 375; GFX9: ; %bb.0: ; %main_body 376; GFX9-NEXT: s_mov_b64 s[12:13], exec 377; GFX9-NEXT: s_wqm_b64 exec, exec 378; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 379; GFX9-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 380; GFX9-NEXT: s_waitcnt vmcnt(0) 381; GFX9-NEXT: ; return to shader part epilog 382; 383; GFX10-LABEL: sample_c_b_1d: 384; GFX10: ; %bb.0: ; %main_body 385; GFX10-NEXT: s_mov_b32 s12, exec_lo 386; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 387; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 388; GFX10-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 389; GFX10-NEXT: s_waitcnt vmcnt(0) 390; GFX10-NEXT: ; return to shader part epilog 391main_body: 392 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 393 ret <4 x float> %v 394} 395 396define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) { 397; GFX9-LABEL: sample_c_b_2d: 398; GFX9: ; %bb.0: ; %main_body 399; GFX9-NEXT: s_mov_b64 s[12:13], exec 400; GFX9-NEXT: s_wqm_b64 exec, exec 401; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 402; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 403; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 404; GFX9-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 405; GFX9-NEXT: s_waitcnt vmcnt(0) 406; GFX9-NEXT: ; return to shader part epilog 407; 408; GFX10-LABEL: sample_c_b_2d: 409; GFX10: ; %bb.0: ; %main_body 410; GFX10-NEXT: s_mov_b32 s12, exec_lo 411; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 412; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 413; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 414; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 415; GFX10-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 416; GFX10-NEXT: s_waitcnt vmcnt(0) 417; GFX10-NEXT: ; return to shader part epilog 418main_body: 419 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 420 ret <4 x float> %v 421} 422 423define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) { 424; GFX9-LABEL: sample_b_cl_1d: 425; GFX9: ; %bb.0: ; %main_body 426; GFX9-NEXT: s_mov_b64 s[12:13], exec 427; GFX9-NEXT: s_wqm_b64 exec, exec 428; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 429; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 430; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 431; GFX9-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 432; GFX9-NEXT: s_waitcnt vmcnt(0) 433; GFX9-NEXT: ; return to shader part epilog 434; 435; GFX10-LABEL: sample_b_cl_1d: 436; GFX10: ; %bb.0: ; %main_body 437; GFX10-NEXT: s_mov_b32 s12, exec_lo 438; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 439; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 440; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 441; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 442; GFX10-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 443; GFX10-NEXT: s_waitcnt vmcnt(0) 444; GFX10-NEXT: ; return to shader part epilog 445main_body: 446 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 447 ret <4 x float> %v 448} 449 450define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) { 451; GFX9-LABEL: sample_b_cl_2d: 452; GFX9: ; %bb.0: ; %main_body 453; GFX9-NEXT: s_mov_b64 s[12:13], exec 454; GFX9-NEXT: s_wqm_b64 exec, exec 455; GFX9-NEXT: v_mov_b32_e32 v5, v3 456; GFX9-NEXT: v_mov_b32_e32 v3, v0 457; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1 458; GFX9-NEXT: v_lshl_or_b32 v4, v2, 16, v0 459; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 460; GFX9-NEXT: image_sample_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16 461; GFX9-NEXT: s_waitcnt vmcnt(0) 462; GFX9-NEXT: ; return to shader part epilog 463; 464; GFX10-LABEL: sample_b_cl_2d: 465; GFX10: ; %bb.0: ; %main_body 466; GFX10-NEXT: s_mov_b32 s12, exec_lo 467; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 468; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 469; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 470; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 471; GFX10-NEXT: image_sample_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 472; GFX10-NEXT: s_waitcnt vmcnt(0) 473; GFX10-NEXT: ; return to shader part epilog 474main_body: 475 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 476 ret <4 x float> %v 477} 478 479define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) { 480; GFX9-LABEL: sample_c_b_cl_1d: 481; GFX9: ; %bb.0: ; %main_body 482; GFX9-NEXT: s_mov_b64 s[12:13], exec 483; GFX9-NEXT: s_wqm_b64 exec, exec 484; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 485; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 486; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 487; GFX9-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 488; GFX9-NEXT: s_waitcnt vmcnt(0) 489; GFX9-NEXT: ; return to shader part epilog 490; 491; GFX10-LABEL: sample_c_b_cl_1d: 492; GFX10: ; %bb.0: ; %main_body 493; GFX10-NEXT: s_mov_b32 s12, exec_lo 494; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 495; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 496; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 497; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 498; GFX10-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 499; GFX10-NEXT: s_waitcnt vmcnt(0) 500; GFX10-NEXT: ; return to shader part epilog 501main_body: 502 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 503 ret <4 x float> %v 504} 505 506define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) { 507; GFX9-LABEL: sample_c_b_cl_2d: 508; GFX9: ; %bb.0: ; %main_body 509; GFX9-NEXT: s_mov_b64 s[12:13], exec 510; GFX9-NEXT: s_wqm_b64 exec, exec 511; GFX9-NEXT: v_mov_b32_e32 v7, v4 512; GFX9-NEXT: v_mov_b32_e32 v4, v0 513; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v2 514; GFX9-NEXT: v_mov_b32_e32 v5, v1 515; GFX9-NEXT: v_lshl_or_b32 v6, v3, 16, v0 516; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 517; GFX9-NEXT: image_sample_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0xf a16 518; GFX9-NEXT: s_waitcnt vmcnt(0) 519; GFX9-NEXT: ; return to shader part epilog 520; 521; GFX10-LABEL: sample_c_b_cl_2d: 522; GFX10: ; %bb.0: ; %main_body 523; GFX10-NEXT: s_mov_b32 s12, exec_lo 524; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 525; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 526; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 527; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 528; GFX10-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 529; GFX10-NEXT: s_waitcnt vmcnt(0) 530; GFX10-NEXT: ; return to shader part epilog 531main_body: 532 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 533 ret <4 x float> %v 534} 535 536define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { 537; GFX9-LABEL: sample_d_1d: 538; GFX9: ; %bb.0: ; %main_body 539; GFX9-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 540; GFX9-NEXT: s_waitcnt vmcnt(0) 541; GFX9-NEXT: ; return to shader part epilog 542; 543; GFX10-LABEL: sample_d_1d: 544; GFX10: ; %bb.0: ; %main_body 545; GFX10-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 546; GFX10-NEXT: s_waitcnt vmcnt(0) 547; GFX10-NEXT: ; return to shader part epilog 548main_body: 549 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 550 ret <4 x float> %v 551} 552 553define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 554; GFX9-LABEL: sample_d_2d: 555; GFX9: ; %bb.0: ; %main_body 556; GFX9-NEXT: v_mov_b32_e32 v6, 0xffff 557; GFX9-NEXT: v_and_b32_e32 v4, v6, v4 558; GFX9-NEXT: v_and_b32_e32 v2, v6, v2 559; GFX9-NEXT: v_and_b32_e32 v0, v6, v0 560; GFX9-NEXT: v_lshl_or_b32 v3, v3, 16, v2 561; GFX9-NEXT: v_lshl_or_b32 v4, v5, 16, v4 562; GFX9-NEXT: v_lshl_or_b32 v2, v1, 16, v0 563; GFX9-NEXT: image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16 564; GFX9-NEXT: s_waitcnt vmcnt(0) 565; GFX9-NEXT: ; return to shader part epilog 566; 567; GFX10-LABEL: sample_d_2d: 568; GFX10: ; %bb.0: ; %main_body 569; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 570; GFX10-NEXT: v_and_b32_e32 v4, v7, v4 571; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 572; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 573; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 574; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 575; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0 576; GFX10-NEXT: image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 577; GFX10-NEXT: s_waitcnt vmcnt(0) 578; GFX10-NEXT: ; return to shader part epilog 579main_body: 580 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 581 ret <4 x float> %v 582} 583 584define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) { 585; GFX9-LABEL: sample_d_3d: 586; GFX9: ; %bb.0: ; %main_body 587; GFX9-NEXT: v_mov_b32_e32 v12, v8 588; GFX9-NEXT: v_mov_b32_e32 v8, v2 589; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 590; GFX9-NEXT: v_mov_b32_e32 v10, v5 591; GFX9-NEXT: v_and_b32_e32 v5, v2, v6 592; GFX9-NEXT: v_and_b32_e32 v3, v2, v3 593; GFX9-NEXT: v_and_b32_e32 v0, v2, v0 594; GFX9-NEXT: v_lshl_or_b32 v11, v7, 16, v5 595; GFX9-NEXT: v_lshl_or_b32 v9, v4, 16, v3 596; GFX9-NEXT: v_lshl_or_b32 v7, v1, 16, v0 597; GFX9-NEXT: image_sample_d v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16 598; GFX9-NEXT: s_waitcnt vmcnt(0) 599; GFX9-NEXT: ; return to shader part epilog 600; 601; GFX10-LABEL: sample_d_3d: 602; GFX10: ; %bb.0: ; %main_body 603; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff 604; GFX10-NEXT: v_and_b32_e32 v6, v9, v6 605; GFX10-NEXT: v_and_b32_e32 v3, v9, v3 606; GFX10-NEXT: v_and_b32_e32 v0, v9, v0 607; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 608; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 609; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 610; GFX10-NEXT: image_sample_d v[0:3], [v0, v2, v3, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 611; GFX10-NEXT: s_waitcnt vmcnt(0) 612; GFX10-NEXT: ; return to shader part epilog 613main_body: 614 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 615 ret <4 x float> %v 616} 617 618define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { 619; GFX9-LABEL: sample_c_d_1d: 620; GFX9: ; %bb.0: ; %main_body 621; GFX9-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 622; GFX9-NEXT: s_waitcnt vmcnt(0) 623; GFX9-NEXT: ; return to shader part epilog 624; 625; GFX10-LABEL: sample_c_d_1d: 626; GFX10: ; %bb.0: ; %main_body 627; GFX10-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 628; GFX10-NEXT: s_waitcnt vmcnt(0) 629; GFX10-NEXT: ; return to shader part epilog 630main_body: 631 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 632 ret <4 x float> %v 633} 634 635define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 636; GFX9-LABEL: sample_c_d_2d: 637; GFX9: ; %bb.0: ; %main_body 638; GFX9-NEXT: v_mov_b32_e32 v9, 0xffff 639; GFX9-NEXT: v_mov_b32_e32 v8, v2 640; GFX9-NEXT: v_mov_b32_e32 v7, v3 641; GFX9-NEXT: v_and_b32_e32 v2, v9, v5 642; GFX9-NEXT: v_and_b32_e32 v1, v9, v1 643; GFX9-NEXT: v_lshl_or_b32 v3, v6, 16, v2 644; GFX9-NEXT: v_and_b32_e32 v2, v9, v7 645; GFX9-NEXT: v_lshl_or_b32 v2, v4, 16, v2 646; GFX9-NEXT: v_lshl_or_b32 v1, v8, 16, v1 647; GFX9-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 648; GFX9-NEXT: s_waitcnt vmcnt(0) 649; GFX9-NEXT: ; return to shader part epilog 650; 651; GFX10-LABEL: sample_c_d_2d: 652; GFX10: ; %bb.0: ; %main_body 653; GFX10-NEXT: v_mov_b32_e32 v10, 0xffff 654; GFX10-NEXT: v_and_b32_e32 v5, v10, v5 655; GFX10-NEXT: v_and_b32_e32 v3, v10, v3 656; GFX10-NEXT: v_and_b32_e32 v1, v10, v1 657; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 658; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 659; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v1 660; GFX10-NEXT: image_sample_c_d v[0:3], [v0, v2, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 661; GFX10-NEXT: s_waitcnt vmcnt(0) 662; GFX10-NEXT: ; return to shader part epilog 663main_body: 664 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 665 ret <4 x float> %v 666} 667 668define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { 669; GFX9-LABEL: sample_d_cl_1d: 670; GFX9: ; %bb.0: ; %main_body 671; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 672; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 673; GFX9-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 674; GFX9-NEXT: s_waitcnt vmcnt(0) 675; GFX9-NEXT: ; return to shader part epilog 676; 677; GFX10-LABEL: sample_d_cl_1d: 678; GFX10: ; %bb.0: ; %main_body 679; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 680; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 681; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 682; GFX10-NEXT: s_waitcnt vmcnt(0) 683; GFX10-NEXT: ; return to shader part epilog 684main_body: 685 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 686 ret <4 x float> %v 687} 688 689define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 690; GFX9-LABEL: sample_d_cl_2d: 691; GFX9: ; %bb.0: ; %main_body 692; GFX9-NEXT: v_mov_b32_e32 v7, 0xffff 693; GFX9-NEXT: v_and_b32_e32 v4, v7, v4 694; GFX9-NEXT: v_and_b32_e32 v2, v7, v2 695; GFX9-NEXT: v_and_b32_e32 v0, v7, v0 696; GFX9-NEXT: v_lshl_or_b32 v5, v5, 16, v4 697; GFX9-NEXT: v_lshl_or_b32 v4, v3, 16, v2 698; GFX9-NEXT: v_lshl_or_b32 v3, v1, 16, v0 699; GFX9-NEXT: image_sample_d_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16 700; GFX9-NEXT: s_waitcnt vmcnt(0) 701; GFX9-NEXT: ; return to shader part epilog 702; 703; GFX10-LABEL: sample_d_cl_2d: 704; GFX10: ; %bb.0: ; %main_body 705; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 706; GFX10-NEXT: v_and_b32_e32 v4, v7, v4 707; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 708; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 709; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 710; GFX10-NEXT: v_lshl_or_b32 v5, v3, 16, v2 711; GFX10-NEXT: v_lshl_or_b32 v3, v1, 16, v0 712; GFX10-NEXT: image_sample_d_cl v[0:3], [v3, v5, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 713; GFX10-NEXT: s_waitcnt vmcnt(0) 714; GFX10-NEXT: ; return to shader part epilog 715main_body: 716 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 717 ret <4 x float> %v 718} 719 720define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { 721; GFX9-LABEL: sample_c_d_cl_1d: 722; GFX9: ; %bb.0: ; %main_body 723; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v3 724; GFX9-NEXT: v_lshl_or_b32 v3, v4, 16, v3 725; GFX9-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 726; GFX9-NEXT: s_waitcnt vmcnt(0) 727; GFX9-NEXT: ; return to shader part epilog 728; 729; GFX10-LABEL: sample_c_d_cl_1d: 730; GFX10: ; %bb.0: ; %main_body 731; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 732; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 733; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 734; GFX10-NEXT: s_waitcnt vmcnt(0) 735; GFX10-NEXT: ; return to shader part epilog 736main_body: 737 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 738 ret <4 x float> %v 739} 740 741define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 742; GFX9-LABEL: sample_c_d_cl_2d: 743; GFX9: ; %bb.0: ; %main_body 744; GFX9-NEXT: v_mov_b32_e32 v11, v7 745; GFX9-NEXT: v_mov_b32_e32 v7, v0 746; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff 747; GFX9-NEXT: v_and_b32_e32 v5, v0, v5 748; GFX9-NEXT: v_and_b32_e32 v3, v0, v3 749; GFX9-NEXT: v_and_b32_e32 v0, v0, v1 750; GFX9-NEXT: v_lshl_or_b32 v10, v6, 16, v5 751; GFX9-NEXT: v_lshl_or_b32 v9, v4, 16, v3 752; GFX9-NEXT: v_lshl_or_b32 v8, v2, 16, v0 753; GFX9-NEXT: image_sample_c_d_cl v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16 754; GFX9-NEXT: s_waitcnt vmcnt(0) 755; GFX9-NEXT: ; return to shader part epilog 756; 757; GFX10-LABEL: sample_c_d_cl_2d: 758; GFX10: ; %bb.0: ; %main_body 759; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff 760; GFX10-NEXT: v_and_b32_e32 v5, v8, v5 761; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 762; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 763; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 764; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 765; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v1 766; GFX10-NEXT: image_sample_c_d_cl v[0:3], [v0, v2, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 767; GFX10-NEXT: s_waitcnt vmcnt(0) 768; GFX10-NEXT: ; return to shader part epilog 769main_body: 770 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 771 ret <4 x float> %v 772} 773 774define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { 775; GFX9-LABEL: sample_cd_1d: 776; GFX9: ; %bb.0: ; %main_body 777; GFX9-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 778; GFX9-NEXT: s_waitcnt vmcnt(0) 779; GFX9-NEXT: ; return to shader part epilog 780; 781; GFX10-LABEL: sample_cd_1d: 782; GFX10: ; %bb.0: ; %main_body 783; GFX10-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 784; GFX10-NEXT: s_waitcnt vmcnt(0) 785; GFX10-NEXT: ; return to shader part epilog 786main_body: 787 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 788 ret <4 x float> %v 789} 790 791define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 792; GFX9-LABEL: sample_cd_2d: 793; GFX9: ; %bb.0: ; %main_body 794; GFX9-NEXT: v_mov_b32_e32 v6, 0xffff 795; GFX9-NEXT: v_and_b32_e32 v4, v6, v4 796; GFX9-NEXT: v_and_b32_e32 v2, v6, v2 797; GFX9-NEXT: v_and_b32_e32 v0, v6, v0 798; GFX9-NEXT: v_lshl_or_b32 v3, v3, 16, v2 799; GFX9-NEXT: v_lshl_or_b32 v4, v5, 16, v4 800; GFX9-NEXT: v_lshl_or_b32 v2, v1, 16, v0 801; GFX9-NEXT: image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16 802; GFX9-NEXT: s_waitcnt vmcnt(0) 803; GFX9-NEXT: ; return to shader part epilog 804; 805; GFX10-LABEL: sample_cd_2d: 806; GFX10: ; %bb.0: ; %main_body 807; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 808; GFX10-NEXT: v_and_b32_e32 v4, v7, v4 809; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 810; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 811; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 812; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 813; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0 814; GFX10-NEXT: image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 815; GFX10-NEXT: s_waitcnt vmcnt(0) 816; GFX10-NEXT: ; return to shader part epilog 817main_body: 818 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 819 ret <4 x float> %v 820} 821 822define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { 823; GFX9-LABEL: sample_c_cd_1d: 824; GFX9: ; %bb.0: ; %main_body 825; GFX9-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 826; GFX9-NEXT: s_waitcnt vmcnt(0) 827; GFX9-NEXT: ; return to shader part epilog 828; 829; GFX10-LABEL: sample_c_cd_1d: 830; GFX10: ; %bb.0: ; %main_body 831; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 832; GFX10-NEXT: s_waitcnt vmcnt(0) 833; GFX10-NEXT: ; return to shader part epilog 834main_body: 835 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 836 ret <4 x float> %v 837} 838 839define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 840; GFX9-LABEL: sample_c_cd_2d: 841; GFX9: ; %bb.0: ; %main_body 842; GFX9-NEXT: v_mov_b32_e32 v9, 0xffff 843; GFX9-NEXT: v_mov_b32_e32 v8, v2 844; GFX9-NEXT: v_mov_b32_e32 v7, v3 845; GFX9-NEXT: v_and_b32_e32 v2, v9, v5 846; GFX9-NEXT: v_and_b32_e32 v1, v9, v1 847; GFX9-NEXT: v_lshl_or_b32 v3, v6, 16, v2 848; GFX9-NEXT: v_and_b32_e32 v2, v9, v7 849; GFX9-NEXT: v_lshl_or_b32 v2, v4, 16, v2 850; GFX9-NEXT: v_lshl_or_b32 v1, v8, 16, v1 851; GFX9-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 852; GFX9-NEXT: s_waitcnt vmcnt(0) 853; GFX9-NEXT: ; return to shader part epilog 854; 855; GFX10-LABEL: sample_c_cd_2d: 856; GFX10: ; %bb.0: ; %main_body 857; GFX10-NEXT: v_mov_b32_e32 v10, 0xffff 858; GFX10-NEXT: v_and_b32_e32 v5, v10, v5 859; GFX10-NEXT: v_and_b32_e32 v3, v10, v3 860; GFX10-NEXT: v_and_b32_e32 v1, v10, v1 861; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 862; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 863; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v1 864; GFX10-NEXT: image_sample_c_cd v[0:3], [v0, v2, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 865; GFX10-NEXT: s_waitcnt vmcnt(0) 866; GFX10-NEXT: ; return to shader part epilog 867main_body: 868 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 869 ret <4 x float> %v 870} 871 872define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { 873; GFX9-LABEL: sample_cd_cl_1d: 874; GFX9: ; %bb.0: ; %main_body 875; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 876; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 877; GFX9-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 878; GFX9-NEXT: s_waitcnt vmcnt(0) 879; GFX9-NEXT: ; return to shader part epilog 880; 881; GFX10-LABEL: sample_cd_cl_1d: 882; GFX10: ; %bb.0: ; %main_body 883; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 884; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 885; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 886; GFX10-NEXT: s_waitcnt vmcnt(0) 887; GFX10-NEXT: ; return to shader part epilog 888main_body: 889 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 890 ret <4 x float> %v 891} 892 893define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 894; GFX9-LABEL: sample_cd_cl_2d: 895; GFX9: ; %bb.0: ; %main_body 896; GFX9-NEXT: v_mov_b32_e32 v7, 0xffff 897; GFX9-NEXT: v_and_b32_e32 v4, v7, v4 898; GFX9-NEXT: v_and_b32_e32 v2, v7, v2 899; GFX9-NEXT: v_and_b32_e32 v0, v7, v0 900; GFX9-NEXT: v_lshl_or_b32 v5, v5, 16, v4 901; GFX9-NEXT: v_lshl_or_b32 v4, v3, 16, v2 902; GFX9-NEXT: v_lshl_or_b32 v3, v1, 16, v0 903; GFX9-NEXT: image_sample_cd_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16 904; GFX9-NEXT: s_waitcnt vmcnt(0) 905; GFX9-NEXT: ; return to shader part epilog 906; 907; GFX10-LABEL: sample_cd_cl_2d: 908; GFX10: ; %bb.0: ; %main_body 909; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 910; GFX10-NEXT: v_and_b32_e32 v4, v7, v4 911; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 912; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 913; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 914; GFX10-NEXT: v_lshl_or_b32 v5, v3, 16, v2 915; GFX10-NEXT: v_lshl_or_b32 v3, v1, 16, v0 916; GFX10-NEXT: image_sample_cd_cl v[0:3], [v3, v5, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 917; GFX10-NEXT: s_waitcnt vmcnt(0) 918; GFX10-NEXT: ; return to shader part epilog 919main_body: 920 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 921 ret <4 x float> %v 922} 923 924define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { 925; GFX9-LABEL: sample_c_cd_cl_1d: 926; GFX9: ; %bb.0: ; %main_body 927; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v3 928; GFX9-NEXT: v_lshl_or_b32 v3, v4, 16, v3 929; GFX9-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 930; GFX9-NEXT: s_waitcnt vmcnt(0) 931; GFX9-NEXT: ; return to shader part epilog 932; 933; GFX10-LABEL: sample_c_cd_cl_1d: 934; GFX10: ; %bb.0: ; %main_body 935; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 936; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 937; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 938; GFX10-NEXT: s_waitcnt vmcnt(0) 939; GFX10-NEXT: ; return to shader part epilog 940main_body: 941 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 942 ret <4 x float> %v 943} 944 945define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 946; GFX9-LABEL: sample_c_cd_cl_2d: 947; GFX9: ; %bb.0: ; %main_body 948; GFX9-NEXT: v_mov_b32_e32 v11, v7 949; GFX9-NEXT: v_mov_b32_e32 v7, v0 950; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff 951; GFX9-NEXT: v_and_b32_e32 v5, v0, v5 952; GFX9-NEXT: v_and_b32_e32 v3, v0, v3 953; GFX9-NEXT: v_and_b32_e32 v0, v0, v1 954; GFX9-NEXT: v_lshl_or_b32 v10, v6, 16, v5 955; GFX9-NEXT: v_lshl_or_b32 v9, v4, 16, v3 956; GFX9-NEXT: v_lshl_or_b32 v8, v2, 16, v0 957; GFX9-NEXT: image_sample_c_cd_cl v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16 958; GFX9-NEXT: s_waitcnt vmcnt(0) 959; GFX9-NEXT: ; return to shader part epilog 960; 961; GFX10-LABEL: sample_c_cd_cl_2d: 962; GFX10: ; %bb.0: ; %main_body 963; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff 964; GFX10-NEXT: v_and_b32_e32 v5, v8, v5 965; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 966; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 967; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 968; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 969; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v1 970; GFX10-NEXT: image_sample_c_cd_cl v[0:3], [v0, v2, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 971; GFX10-NEXT: s_waitcnt vmcnt(0) 972; GFX10-NEXT: ; return to shader part epilog 973main_body: 974 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 975 ret <4 x float> %v 976} 977 978define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) { 979; GFX9-LABEL: sample_l_1d: 980; GFX9: ; %bb.0: ; %main_body 981; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 982; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 983; GFX9-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 984; GFX9-NEXT: s_waitcnt vmcnt(0) 985; GFX9-NEXT: ; return to shader part epilog 986; 987; GFX10-LABEL: sample_l_1d: 988; GFX10: ; %bb.0: ; %main_body 989; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 990; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 991; GFX10-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 992; GFX10-NEXT: s_waitcnt vmcnt(0) 993; GFX10-NEXT: ; return to shader part epilog 994main_body: 995 %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 996 ret <4 x float> %v 997} 998 999define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { 1000; GFX9-LABEL: sample_l_2d: 1001; GFX9: ; %bb.0: ; %main_body 1002; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 1003; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 1004; GFX9-NEXT: image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 1005; GFX9-NEXT: s_waitcnt vmcnt(0) 1006; GFX9-NEXT: ; return to shader part epilog 1007; 1008; GFX10-LABEL: sample_l_2d: 1009; GFX10: ; %bb.0: ; %main_body 1010; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 1011; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 1012; GFX10-NEXT: image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 1013; GFX10-NEXT: s_waitcnt vmcnt(0) 1014; GFX10-NEXT: ; return to shader part epilog 1015main_body: 1016 %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1017 ret <4 x float> %v 1018} 1019 1020define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) { 1021; GFX9-LABEL: sample_c_l_1d: 1022; GFX9: ; %bb.0: ; %main_body 1023; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 1024; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 1025; GFX9-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 1026; GFX9-NEXT: s_waitcnt vmcnt(0) 1027; GFX9-NEXT: ; return to shader part epilog 1028; 1029; GFX10-LABEL: sample_c_l_1d: 1030; GFX10: ; %bb.0: ; %main_body 1031; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 1032; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 1033; GFX10-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 1034; GFX10-NEXT: s_waitcnt vmcnt(0) 1035; GFX10-NEXT: ; return to shader part epilog 1036main_body: 1037 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1038 ret <4 x float> %v 1039} 1040 1041define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { 1042; GFX9-LABEL: sample_c_l_2d: 1043; GFX9: ; %bb.0: ; %main_body 1044; GFX9-NEXT: v_mov_b32_e32 v5, v3 1045; GFX9-NEXT: v_mov_b32_e32 v3, v0 1046; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1 1047; GFX9-NEXT: v_lshl_or_b32 v4, v2, 16, v0 1048; GFX9-NEXT: image_sample_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16 1049; GFX9-NEXT: s_waitcnt vmcnt(0) 1050; GFX9-NEXT: ; return to shader part epilog 1051; 1052; GFX10-LABEL: sample_c_l_2d: 1053; GFX10: ; %bb.0: ; %main_body 1054; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 1055; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 1056; GFX10-NEXT: image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 1057; GFX10-NEXT: s_waitcnt vmcnt(0) 1058; GFX10-NEXT: ; return to shader part epilog 1059main_body: 1060 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1061 ret <4 x float> %v 1062} 1063 1064define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { 1065; GFX9-LABEL: sample_lz_1d: 1066; GFX9: ; %bb.0: ; %main_body 1067; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 1068; GFX9-NEXT: s_waitcnt vmcnt(0) 1069; GFX9-NEXT: ; return to shader part epilog 1070; 1071; GFX10-LABEL: sample_lz_1d: 1072; GFX10: ; %bb.0: ; %main_body 1073; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 1074; GFX10-NEXT: s_waitcnt vmcnt(0) 1075; GFX10-NEXT: ; return to shader part epilog 1076main_body: 1077 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1078 ret <4 x float> %v 1079} 1080 1081define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { 1082; GFX9-LABEL: sample_lz_2d: 1083; GFX9: ; %bb.0: ; %main_body 1084; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 1085; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1086; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 1087; GFX9-NEXT: s_waitcnt vmcnt(0) 1088; GFX9-NEXT: ; return to shader part epilog 1089; 1090; GFX10-LABEL: sample_lz_2d: 1091; GFX10: ; %bb.0: ; %main_body 1092; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 1093; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1094; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 1095; GFX10-NEXT: s_waitcnt vmcnt(0) 1096; GFX10-NEXT: ; return to shader part epilog 1097main_body: 1098 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1099 ret <4 x float> %v 1100} 1101 1102define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { 1103; GFX9-LABEL: sample_c_lz_1d: 1104; GFX9: ; %bb.0: ; %main_body 1105; GFX9-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 1106; GFX9-NEXT: s_waitcnt vmcnt(0) 1107; GFX9-NEXT: ; return to shader part epilog 1108; 1109; GFX10-LABEL: sample_c_lz_1d: 1110; GFX10: ; %bb.0: ; %main_body 1111; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 1112; GFX10-NEXT: s_waitcnt vmcnt(0) 1113; GFX10-NEXT: ; return to shader part epilog 1114main_body: 1115 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1116 ret <4 x float> %v 1117} 1118 1119define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { 1120; GFX9-LABEL: sample_c_lz_2d: 1121; GFX9: ; %bb.0: ; %main_body 1122; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 1123; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 1124; GFX9-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 1125; GFX9-NEXT: s_waitcnt vmcnt(0) 1126; GFX9-NEXT: ; return to shader part epilog 1127; 1128; GFX10-LABEL: sample_c_lz_2d: 1129; GFX10: ; %bb.0: ; %main_body 1130; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 1131; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 1132; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 1133; GFX10-NEXT: s_waitcnt vmcnt(0) 1134; GFX10-NEXT: ; return to shader part epilog 1135main_body: 1136 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1137 ret <4 x float> %v 1138} 1139 1140define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { 1141; GFX9-LABEL: sample_c_d_o_2darray_V1: 1142; GFX9: ; %bb.0: ; %main_body 1143; GFX9-NEXT: v_mov_b32_e32 v13, v8 1144; GFX9-NEXT: v_mov_b32_e32 v8, v0 1145; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff 1146; GFX9-NEXT: v_mov_b32_e32 v9, v1 1147; GFX9-NEXT: v_and_b32_e32 v1, v0, v6 1148; GFX9-NEXT: v_lshl_or_b32 v12, v7, 16, v1 1149; GFX9-NEXT: v_and_b32_e32 v1, v0, v4 1150; GFX9-NEXT: v_and_b32_e32 v0, v0, v2 1151; GFX9-NEXT: v_lshl_or_b32 v11, v5, 16, v1 1152; GFX9-NEXT: v_lshl_or_b32 v10, v3, 16, v0 1153; GFX9-NEXT: image_sample_c_d_o v0, v[8:15], s[0:7], s[8:11] dmask:0x4 a16 da 1154; GFX9-NEXT: s_waitcnt vmcnt(0) 1155; GFX9-NEXT: ; return to shader part epilog 1156; 1157; GFX10-LABEL: sample_c_d_o_2darray_V1: 1158; GFX10: ; %bb.0: ; %main_body 1159; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff 1160; GFX10-NEXT: v_and_b32_e32 v6, v9, v6 1161; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 1162; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 1163; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 1164; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 1165; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 1166; GFX10-NEXT: image_sample_c_d_o v0, [v0, v1, v3, v4, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 1167; GFX10-NEXT: s_waitcnt vmcnt(0) 1168; GFX10-NEXT: ; return to shader part epilog 1169main_body: 1170 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1171 ret float %v 1172} 1173 1174define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { 1175; GFX9-LABEL: sample_c_d_o_2darray_V2: 1176; GFX9: ; %bb.0: ; %main_body 1177; GFX9-NEXT: v_mov_b32_e32 v13, v8 1178; GFX9-NEXT: v_mov_b32_e32 v8, v0 1179; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff 1180; GFX9-NEXT: v_mov_b32_e32 v9, v1 1181; GFX9-NEXT: v_and_b32_e32 v1, v0, v6 1182; GFX9-NEXT: v_lshl_or_b32 v12, v7, 16, v1 1183; GFX9-NEXT: v_and_b32_e32 v1, v0, v4 1184; GFX9-NEXT: v_and_b32_e32 v0, v0, v2 1185; GFX9-NEXT: v_lshl_or_b32 v11, v5, 16, v1 1186; GFX9-NEXT: v_lshl_or_b32 v10, v3, 16, v0 1187; GFX9-NEXT: image_sample_c_d_o v[0:1], v[8:15], s[0:7], s[8:11] dmask:0x6 a16 da 1188; GFX9-NEXT: s_waitcnt vmcnt(0) 1189; GFX9-NEXT: ; return to shader part epilog 1190; 1191; GFX10-LABEL: sample_c_d_o_2darray_V2: 1192; GFX10: ; %bb.0: ; %main_body 1193; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff 1194; GFX10-NEXT: v_and_b32_e32 v6, v9, v6 1195; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 1196; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 1197; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 1198; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 1199; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 1200; GFX10-NEXT: image_sample_c_d_o v[0:1], [v0, v1, v3, v4, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 1201; GFX10-NEXT: s_waitcnt vmcnt(0) 1202; GFX10-NEXT: ; return to shader part epilog 1203main_body: 1204 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1205 ret <2 x float> %v 1206} 1207 1208declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1209declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1210declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1211declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1212declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1213declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1214declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1215 1216declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1217declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1218declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1219declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1220declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1221declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1222 1223declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1224declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1225declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1226declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1227declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1228declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1229declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1230declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1231 1232declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1233declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1234declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1235declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1236declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1237declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1238declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1239declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1240declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1241 1242declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1243declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1244declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1245declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1246declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1247declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1248declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1249declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1250 1251declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1252declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1253declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1254declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1255 1256declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1257declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1258declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1259declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1260 1261declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1262declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1263 1264attributes #0 = { nounwind } 1265attributes #1 = { nounwind readonly } 1266attributes #2 = { nounwind readnone } 1267