1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 3 4define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { 5; GFX10-LABEL: sample_d_1d: 6; GFX10: ; %bb.0: ; %main_body 7; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff 8; GFX10-NEXT: s_lshl_b32 s12, s0, 16 9; GFX10-NEXT: v_and_or_b32 v0, v0, v3, s12 10; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s12 11; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 12; GFX10-NEXT: s_waitcnt vmcnt(0) 13; GFX10-NEXT: ; return to shader part epilog 14main_body: 15 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 16 ret <4 x float> %v 17} 18 19define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 20; GFX10-LABEL: sample_d_2d: 21; GFX10: ; %bb.0: ; %main_body 22; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff 23; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 24; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 25; GFX10-NEXT: v_and_or_b32 v7, v0, v6, v1 26; GFX10-NEXT: v_and_or_b32 v2, v2, v6, v3 27; GFX10-NEXT: image_sample_d_g16 v[0:3], [v7, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 28; GFX10-NEXT: s_waitcnt vmcnt(0) 29; GFX10-NEXT: ; return to shader part epilog 30main_body: 31 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 32 ret <4 x float> %v 33} 34 35define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) { 36; GFX10-LABEL: sample_d_3d: 37; GFX10: ; %bb.0: ; %main_body 38; GFX10-NEXT: v_mov_b32_e32 v11, 0xffff 39; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 40; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 41; GFX10-NEXT: s_lshl_b32 s12, s0, 16 42; GFX10-NEXT: v_and_or_b32 v0, v0, v11, v1 43; GFX10-NEXT: v_and_or_b32 v1, v2, v11, s12 44; GFX10-NEXT: v_and_or_b32 v2, v3, v11, v4 45; GFX10-NEXT: v_and_or_b32 v3, v5, v11, s12 46; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 47; GFX10-NEXT: s_waitcnt vmcnt(0) 48; GFX10-NEXT: ; return to shader part epilog 49main_body: 50 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 51 ret <4 x float> %v 52} 53 54define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { 55; GFX10-LABEL: sample_c_d_1d: 56; GFX10: ; %bb.0: ; %main_body 57; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 58; GFX10-NEXT: s_lshl_b32 s12, s0, 16 59; GFX10-NEXT: v_and_or_b32 v1, v1, v4, s12 60; GFX10-NEXT: v_and_or_b32 v2, v2, v4, s12 61; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 62; GFX10-NEXT: s_waitcnt vmcnt(0) 63; GFX10-NEXT: ; return to shader part epilog 64main_body: 65 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 66 ret <4 x float> %v 67} 68 69define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 70; GFX10-LABEL: sample_c_d_2d: 71; GFX10: ; %bb.0: ; %main_body 72; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 73; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 74; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 75; GFX10-NEXT: v_and_or_b32 v11, v1, v7, v2 76; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 77; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v11, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 78; GFX10-NEXT: s_waitcnt vmcnt(0) 79; GFX10-NEXT: ; return to shader part epilog 80main_body: 81 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 82 ret <4 x float> %v 83} 84 85define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { 86; GFX10-LABEL: sample_d_cl_1d: 87; GFX10: ; %bb.0: ; %main_body 88; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 89; GFX10-NEXT: s_lshl_b32 s12, s0, 16 90; GFX10-NEXT: v_and_or_b32 v0, v0, v7, s12 91; GFX10-NEXT: v_and_or_b32 v1, v1, v7, s12 92; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 93; GFX10-NEXT: s_waitcnt vmcnt(0) 94; GFX10-NEXT: ; return to shader part epilog 95main_body: 96 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 97 ret <4 x float> %v 98} 99 100define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 101; GFX10-LABEL: sample_d_cl_2d: 102; GFX10: ; %bb.0: ; %main_body 103; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 104; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 105; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v3 106; GFX10-NEXT: v_and_or_b32 v11, v0, v7, v1 107; GFX10-NEXT: v_and_or_b32 v1, v2, v7, v9 108; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v11, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 109; GFX10-NEXT: s_waitcnt vmcnt(0) 110; GFX10-NEXT: ; return to shader part epilog 111main_body: 112 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 113 ret <4 x float> %v 114} 115 116define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { 117; GFX10-LABEL: sample_c_d_cl_1d: 118; GFX10: ; %bb.0: ; %main_body 119; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 120; GFX10-NEXT: s_lshl_b32 s12, s0, 16 121; GFX10-NEXT: v_and_or_b32 v1, v1, v7, s12 122; GFX10-NEXT: v_and_or_b32 v2, v2, v7, s12 123; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 124; GFX10-NEXT: s_waitcnt vmcnt(0) 125; GFX10-NEXT: ; return to shader part epilog 126main_body: 127 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 128 ret <4 x float> %v 129} 130 131define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 132; GFX10-LABEL: sample_c_d_cl_2d: 133; GFX10: ; %bb.0: ; %main_body 134; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff 135; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 136; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v4 137; GFX10-NEXT: v_and_or_b32 v1, v1, v8, v2 138; GFX10-NEXT: v_and_or_b32 v2, v3, v8, v10 139; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 140; GFX10-NEXT: s_waitcnt vmcnt(0) 141; GFX10-NEXT: ; return to shader part epilog 142main_body: 143 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 144 ret <4 x float> %v 145} 146 147define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { 148; GFX10-LABEL: sample_cd_1d: 149; GFX10: ; %bb.0: ; %main_body 150; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff 151; GFX10-NEXT: s_lshl_b32 s12, s0, 16 152; GFX10-NEXT: v_and_or_b32 v0, v0, v3, s12 153; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s12 154; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 155; GFX10-NEXT: s_waitcnt vmcnt(0) 156; GFX10-NEXT: ; return to shader part epilog 157main_body: 158 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 159 ret <4 x float> %v 160} 161 162define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 163; GFX10-LABEL: sample_cd_2d: 164; GFX10: ; %bb.0: ; %main_body 165; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff 166; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 167; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 168; GFX10-NEXT: v_and_or_b32 v7, v0, v6, v1 169; GFX10-NEXT: v_and_or_b32 v2, v2, v6, v3 170; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v7, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 171; GFX10-NEXT: s_waitcnt vmcnt(0) 172; GFX10-NEXT: ; return to shader part epilog 173main_body: 174 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 175 ret <4 x float> %v 176} 177 178define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { 179; GFX10-LABEL: sample_c_cd_1d: 180; GFX10: ; %bb.0: ; %main_body 181; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 182; GFX10-NEXT: s_lshl_b32 s12, s0, 16 183; GFX10-NEXT: v_and_or_b32 v1, v1, v4, s12 184; GFX10-NEXT: v_and_or_b32 v2, v2, v4, s12 185; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 186; GFX10-NEXT: s_waitcnt vmcnt(0) 187; GFX10-NEXT: ; return to shader part epilog 188main_body: 189 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 190 ret <4 x float> %v 191} 192 193define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 194; GFX10-LABEL: sample_c_cd_2d: 195; GFX10: ; %bb.0: ; %main_body 196; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 197; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 198; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 199; GFX10-NEXT: v_and_or_b32 v11, v1, v7, v2 200; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 201; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v11, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 202; GFX10-NEXT: s_waitcnt vmcnt(0) 203; GFX10-NEXT: ; return to shader part epilog 204main_body: 205 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 206 ret <4 x float> %v 207} 208 209define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { 210; GFX10-LABEL: sample_cd_cl_1d: 211; GFX10: ; %bb.0: ; %main_body 212; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 213; GFX10-NEXT: s_lshl_b32 s12, s0, 16 214; GFX10-NEXT: v_and_or_b32 v0, v0, v7, s12 215; GFX10-NEXT: v_and_or_b32 v1, v1, v7, s12 216; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 217; GFX10-NEXT: s_waitcnt vmcnt(0) 218; GFX10-NEXT: ; return to shader part epilog 219main_body: 220 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 221 ret <4 x float> %v 222} 223 224define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 225; GFX10-LABEL: sample_cd_cl_2d: 226; GFX10: ; %bb.0: ; %main_body 227; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 228; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 229; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v3 230; GFX10-NEXT: v_and_or_b32 v11, v0, v7, v1 231; GFX10-NEXT: v_and_or_b32 v1, v2, v7, v9 232; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v11, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 233; GFX10-NEXT: s_waitcnt vmcnt(0) 234; GFX10-NEXT: ; return to shader part epilog 235main_body: 236 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 237 ret <4 x float> %v 238} 239 240define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { 241; GFX10-LABEL: sample_c_cd_cl_1d: 242; GFX10: ; %bb.0: ; %main_body 243; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 244; GFX10-NEXT: s_lshl_b32 s12, s0, 16 245; GFX10-NEXT: v_and_or_b32 v1, v1, v7, s12 246; GFX10-NEXT: v_and_or_b32 v2, v2, v7, s12 247; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 248; GFX10-NEXT: s_waitcnt vmcnt(0) 249; GFX10-NEXT: ; return to shader part epilog 250main_body: 251 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 252 ret <4 x float> %v 253} 254 255define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 256; GFX10-LABEL: sample_c_cd_cl_2d: 257; GFX10: ; %bb.0: ; %main_body 258; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff 259; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 260; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v4 261; GFX10-NEXT: v_and_or_b32 v1, v1, v8, v2 262; GFX10-NEXT: v_and_or_b32 v2, v3, v8, v10 263; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 264; GFX10-NEXT: s_waitcnt vmcnt(0) 265; GFX10-NEXT: ; return to shader part epilog 266main_body: 267 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 268 ret <4 x float> %v 269} 270 271define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 272; GFX10-LABEL: sample_c_d_o_2darray_V1: 273; GFX10: ; %bb.0: ; %main_body 274; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff 275; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 276; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v5 277; GFX10-NEXT: v_and_or_b32 v2, v2, v9, v3 278; GFX10-NEXT: v_and_or_b32 v3, v4, v9, v11 279; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY 280; GFX10-NEXT: s_waitcnt vmcnt(0) 281; GFX10-NEXT: ; return to shader part epilog 282main_body: 283 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 284 ret float %v 285} 286 287define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 288; GFX10-LABEL: sample_c_d_o_2darray_V2: 289; GFX10: ; %bb.0: ; %main_body 290; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff 291; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 292; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v5 293; GFX10-NEXT: v_and_or_b32 v2, v2, v9, v3 294; GFX10-NEXT: v_and_or_b32 v3, v4, v9, v11 295; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY 296; GFX10-NEXT: s_waitcnt vmcnt(0) 297; GFX10-NEXT: ; return to shader part epilog 298main_body: 299 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 300 ret <2 x float> %v 301} 302 303declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 304declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 305declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 306declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 307declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 308declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 309declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 310declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 311declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 312 313declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 314declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 315declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 316declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 317declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 318declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 319declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 320declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 321 322declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 323declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 324 325attributes #0 = { nounwind } 326attributes #1 = { nounwind readonly } 327attributes #2 = { nounwind readnone } 328