1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10NSA %s 4 5define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 6; GFX6-LABEL: gather4_2d: 7; GFX6: ; %bb.0: ; %main_body 8; GFX6-NEXT: s_mov_b32 s0, s2 9; GFX6-NEXT: s_mov_b32 s1, s3 10; GFX6-NEXT: s_mov_b32 s2, s4 11; GFX6-NEXT: s_mov_b32 s3, s5 12; GFX6-NEXT: s_mov_b32 s4, s6 13; GFX6-NEXT: s_mov_b32 s5, s7 14; GFX6-NEXT: s_mov_b32 s6, s8 15; GFX6-NEXT: s_mov_b32 s7, s9 16; GFX6-NEXT: s_mov_b32 s8, s10 17; GFX6-NEXT: s_mov_b32 s9, s11 18; GFX6-NEXT: s_mov_b64 s[14:15], exec 19; GFX6-NEXT: s_mov_b32 s10, s12 20; GFX6-NEXT: s_mov_b32 s11, s13 21; GFX6-NEXT: s_wqm_b64 exec, exec 22; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 23; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 24; GFX6-NEXT: s_waitcnt vmcnt(0) 25; GFX6-NEXT: ; return to shader part epilog 26; 27; GFX10NSA-LABEL: gather4_2d: 28; GFX10NSA: ; %bb.0: ; %main_body 29; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 30; GFX10NSA-NEXT: s_mov_b32 s0, s2 31; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 32; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 33; GFX10NSA-NEXT: s_mov_b32 s1, s3 34; GFX10NSA-NEXT: s_mov_b32 s2, s4 35; GFX10NSA-NEXT: s_mov_b32 s3, s5 36; GFX10NSA-NEXT: s_mov_b32 s4, s6 37; GFX10NSA-NEXT: s_mov_b32 s5, s7 38; GFX10NSA-NEXT: s_mov_b32 s6, s8 39; GFX10NSA-NEXT: s_mov_b32 s7, s9 40; GFX10NSA-NEXT: s_mov_b32 s8, s10 41; GFX10NSA-NEXT: s_mov_b32 s9, s11 42; GFX10NSA-NEXT: s_mov_b32 s10, s12 43; GFX10NSA-NEXT: s_mov_b32 s11, s13 44; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 45; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 46; GFX10NSA-NEXT: ; return to shader part epilog 47main_body: 48 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 49 ret <4 x float> %v 50} 51 52define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) { 53; GFX6-LABEL: gather4_cube: 54; GFX6: ; %bb.0: ; %main_body 55; GFX6-NEXT: s_mov_b32 s0, s2 56; GFX6-NEXT: s_mov_b32 s1, s3 57; GFX6-NEXT: s_mov_b32 s2, s4 58; GFX6-NEXT: s_mov_b32 s3, s5 59; GFX6-NEXT: s_mov_b32 s4, s6 60; GFX6-NEXT: s_mov_b32 s5, s7 61; GFX6-NEXT: s_mov_b32 s6, s8 62; GFX6-NEXT: s_mov_b32 s7, s9 63; GFX6-NEXT: s_mov_b32 s8, s10 64; GFX6-NEXT: s_mov_b32 s9, s11 65; GFX6-NEXT: s_mov_b64 s[14:15], exec 66; GFX6-NEXT: s_mov_b32 s10, s12 67; GFX6-NEXT: s_mov_b32 s11, s13 68; GFX6-NEXT: s_wqm_b64 exec, exec 69; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 70; GFX6-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 da 71; GFX6-NEXT: s_waitcnt vmcnt(0) 72; GFX6-NEXT: ; return to shader part epilog 73; 74; GFX10NSA-LABEL: gather4_cube: 75; GFX10NSA: ; %bb.0: ; %main_body 76; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 77; GFX10NSA-NEXT: s_mov_b32 s0, s2 78; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 79; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 80; GFX10NSA-NEXT: s_mov_b32 s1, s3 81; GFX10NSA-NEXT: s_mov_b32 s2, s4 82; GFX10NSA-NEXT: s_mov_b32 s3, s5 83; GFX10NSA-NEXT: s_mov_b32 s4, s6 84; GFX10NSA-NEXT: s_mov_b32 s5, s7 85; GFX10NSA-NEXT: s_mov_b32 s6, s8 86; GFX10NSA-NEXT: s_mov_b32 s7, s9 87; GFX10NSA-NEXT: s_mov_b32 s8, s10 88; GFX10NSA-NEXT: s_mov_b32 s9, s11 89; GFX10NSA-NEXT: s_mov_b32 s10, s12 90; GFX10NSA-NEXT: s_mov_b32 s11, s13 91; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE 92; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 93; GFX10NSA-NEXT: ; return to shader part epilog 94main_body: 95 %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 1, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 96 ret <4 x float> %v 97} 98 99define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) { 100; GFX6-LABEL: gather4_2darray: 101; GFX6: ; %bb.0: ; %main_body 102; GFX6-NEXT: s_mov_b32 s0, s2 103; GFX6-NEXT: s_mov_b32 s1, s3 104; GFX6-NEXT: s_mov_b32 s2, s4 105; GFX6-NEXT: s_mov_b32 s3, s5 106; GFX6-NEXT: s_mov_b32 s4, s6 107; GFX6-NEXT: s_mov_b32 s5, s7 108; GFX6-NEXT: s_mov_b32 s6, s8 109; GFX6-NEXT: s_mov_b32 s7, s9 110; GFX6-NEXT: s_mov_b32 s8, s10 111; GFX6-NEXT: s_mov_b32 s9, s11 112; GFX6-NEXT: s_mov_b64 s[14:15], exec 113; GFX6-NEXT: s_mov_b32 s10, s12 114; GFX6-NEXT: s_mov_b32 s11, s13 115; GFX6-NEXT: s_wqm_b64 exec, exec 116; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 117; GFX6-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 da 118; GFX6-NEXT: s_waitcnt vmcnt(0) 119; GFX6-NEXT: ; return to shader part epilog 120; 121; GFX10NSA-LABEL: gather4_2darray: 122; GFX10NSA: ; %bb.0: ; %main_body 123; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 124; GFX10NSA-NEXT: s_mov_b32 s0, s2 125; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 126; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 127; GFX10NSA-NEXT: s_mov_b32 s1, s3 128; GFX10NSA-NEXT: s_mov_b32 s2, s4 129; GFX10NSA-NEXT: s_mov_b32 s3, s5 130; GFX10NSA-NEXT: s_mov_b32 s4, s6 131; GFX10NSA-NEXT: s_mov_b32 s5, s7 132; GFX10NSA-NEXT: s_mov_b32 s6, s8 133; GFX10NSA-NEXT: s_mov_b32 s7, s9 134; GFX10NSA-NEXT: s_mov_b32 s8, s10 135; GFX10NSA-NEXT: s_mov_b32 s9, s11 136; GFX10NSA-NEXT: s_mov_b32 s10, s12 137; GFX10NSA-NEXT: s_mov_b32 s11, s13 138; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY 139; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 140; GFX10NSA-NEXT: ; return to shader part epilog 141main_body: 142 %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 1, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 143 ret <4 x float> %v 144} 145 146define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 147; GFX6-LABEL: gather4_c_2d: 148; GFX6: ; %bb.0: ; %main_body 149; GFX6-NEXT: s_mov_b32 s0, s2 150; GFX6-NEXT: s_mov_b32 s1, s3 151; GFX6-NEXT: s_mov_b32 s2, s4 152; GFX6-NEXT: s_mov_b32 s3, s5 153; GFX6-NEXT: s_mov_b32 s4, s6 154; GFX6-NEXT: s_mov_b32 s5, s7 155; GFX6-NEXT: s_mov_b32 s6, s8 156; GFX6-NEXT: s_mov_b32 s7, s9 157; GFX6-NEXT: s_mov_b32 s8, s10 158; GFX6-NEXT: s_mov_b32 s9, s11 159; GFX6-NEXT: s_mov_b64 s[14:15], exec 160; GFX6-NEXT: s_mov_b32 s10, s12 161; GFX6-NEXT: s_mov_b32 s11, s13 162; GFX6-NEXT: s_wqm_b64 exec, exec 163; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 164; GFX6-NEXT: image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 165; GFX6-NEXT: s_waitcnt vmcnt(0) 166; GFX6-NEXT: ; return to shader part epilog 167; 168; GFX10NSA-LABEL: gather4_c_2d: 169; GFX10NSA: ; %bb.0: ; %main_body 170; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 171; GFX10NSA-NEXT: s_mov_b32 s0, s2 172; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 173; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 174; GFX10NSA-NEXT: s_mov_b32 s1, s3 175; GFX10NSA-NEXT: s_mov_b32 s2, s4 176; GFX10NSA-NEXT: s_mov_b32 s3, s5 177; GFX10NSA-NEXT: s_mov_b32 s4, s6 178; GFX10NSA-NEXT: s_mov_b32 s5, s7 179; GFX10NSA-NEXT: s_mov_b32 s6, s8 180; GFX10NSA-NEXT: s_mov_b32 s7, s9 181; GFX10NSA-NEXT: s_mov_b32 s8, s10 182; GFX10NSA-NEXT: s_mov_b32 s9, s11 183; GFX10NSA-NEXT: s_mov_b32 s10, s12 184; GFX10NSA-NEXT: s_mov_b32 s11, s13 185; GFX10NSA-NEXT: image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 186; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 187; GFX10NSA-NEXT: ; return to shader part epilog 188main_body: 189 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 190 ret <4 x float> %v 191} 192 193define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) { 194; GFX6-LABEL: gather4_cl_2d: 195; GFX6: ; %bb.0: ; %main_body 196; GFX6-NEXT: s_mov_b32 s0, s2 197; GFX6-NEXT: s_mov_b32 s1, s3 198; GFX6-NEXT: s_mov_b32 s2, s4 199; GFX6-NEXT: s_mov_b32 s3, s5 200; GFX6-NEXT: s_mov_b32 s4, s6 201; GFX6-NEXT: s_mov_b32 s5, s7 202; GFX6-NEXT: s_mov_b32 s6, s8 203; GFX6-NEXT: s_mov_b32 s7, s9 204; GFX6-NEXT: s_mov_b32 s8, s10 205; GFX6-NEXT: s_mov_b32 s9, s11 206; GFX6-NEXT: s_mov_b64 s[14:15], exec 207; GFX6-NEXT: s_mov_b32 s10, s12 208; GFX6-NEXT: s_mov_b32 s11, s13 209; GFX6-NEXT: s_wqm_b64 exec, exec 210; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 211; GFX6-NEXT: image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 212; GFX6-NEXT: s_waitcnt vmcnt(0) 213; GFX6-NEXT: ; return to shader part epilog 214; 215; GFX10NSA-LABEL: gather4_cl_2d: 216; GFX10NSA: ; %bb.0: ; %main_body 217; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 218; GFX10NSA-NEXT: s_mov_b32 s0, s2 219; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 220; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 221; GFX10NSA-NEXT: s_mov_b32 s1, s3 222; GFX10NSA-NEXT: s_mov_b32 s2, s4 223; GFX10NSA-NEXT: s_mov_b32 s3, s5 224; GFX10NSA-NEXT: s_mov_b32 s4, s6 225; GFX10NSA-NEXT: s_mov_b32 s5, s7 226; GFX10NSA-NEXT: s_mov_b32 s6, s8 227; GFX10NSA-NEXT: s_mov_b32 s7, s9 228; GFX10NSA-NEXT: s_mov_b32 s8, s10 229; GFX10NSA-NEXT: s_mov_b32 s9, s11 230; GFX10NSA-NEXT: s_mov_b32 s10, s12 231; GFX10NSA-NEXT: s_mov_b32 s11, s13 232; GFX10NSA-NEXT: image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 233; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 234; GFX10NSA-NEXT: ; return to shader part epilog 235main_body: 236 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 1, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 237 ret <4 x float> %v 238} 239 240define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) { 241; GFX6-LABEL: gather4_c_cl_2d: 242; GFX6: ; %bb.0: ; %main_body 243; GFX6-NEXT: s_mov_b32 s0, s2 244; GFX6-NEXT: s_mov_b32 s1, s3 245; GFX6-NEXT: s_mov_b32 s2, s4 246; GFX6-NEXT: s_mov_b32 s3, s5 247; GFX6-NEXT: s_mov_b32 s4, s6 248; GFX6-NEXT: s_mov_b32 s5, s7 249; GFX6-NEXT: s_mov_b32 s6, s8 250; GFX6-NEXT: s_mov_b32 s7, s9 251; GFX6-NEXT: s_mov_b32 s8, s10 252; GFX6-NEXT: s_mov_b32 s9, s11 253; GFX6-NEXT: s_mov_b64 s[14:15], exec 254; GFX6-NEXT: s_mov_b32 s10, s12 255; GFX6-NEXT: s_mov_b32 s11, s13 256; GFX6-NEXT: s_wqm_b64 exec, exec 257; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 258; GFX6-NEXT: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 259; GFX6-NEXT: s_waitcnt vmcnt(0) 260; GFX6-NEXT: ; return to shader part epilog 261; 262; GFX10NSA-LABEL: gather4_c_cl_2d: 263; GFX10NSA: ; %bb.0: ; %main_body 264; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 265; GFX10NSA-NEXT: s_mov_b32 s0, s2 266; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 267; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 268; GFX10NSA-NEXT: s_mov_b32 s1, s3 269; GFX10NSA-NEXT: s_mov_b32 s2, s4 270; GFX10NSA-NEXT: s_mov_b32 s3, s5 271; GFX10NSA-NEXT: s_mov_b32 s4, s6 272; GFX10NSA-NEXT: s_mov_b32 s5, s7 273; GFX10NSA-NEXT: s_mov_b32 s6, s8 274; GFX10NSA-NEXT: s_mov_b32 s7, s9 275; GFX10NSA-NEXT: s_mov_b32 s8, s10 276; GFX10NSA-NEXT: s_mov_b32 s9, s11 277; GFX10NSA-NEXT: s_mov_b32 s10, s12 278; GFX10NSA-NEXT: s_mov_b32 s11, s13 279; GFX10NSA-NEXT: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 280; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 281; GFX10NSA-NEXT: ; return to shader part epilog 282main_body: 283 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 284 ret <4 x float> %v 285} 286 287define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { 288; GFX6-LABEL: gather4_b_2d: 289; GFX6: ; %bb.0: ; %main_body 290; GFX6-NEXT: s_mov_b32 s0, s2 291; GFX6-NEXT: s_mov_b32 s1, s3 292; GFX6-NEXT: s_mov_b32 s2, s4 293; GFX6-NEXT: s_mov_b32 s3, s5 294; GFX6-NEXT: s_mov_b32 s4, s6 295; GFX6-NEXT: s_mov_b32 s5, s7 296; GFX6-NEXT: s_mov_b32 s6, s8 297; GFX6-NEXT: s_mov_b32 s7, s9 298; GFX6-NEXT: s_mov_b32 s8, s10 299; GFX6-NEXT: s_mov_b32 s9, s11 300; GFX6-NEXT: s_mov_b64 s[14:15], exec 301; GFX6-NEXT: s_mov_b32 s10, s12 302; GFX6-NEXT: s_mov_b32 s11, s13 303; GFX6-NEXT: s_wqm_b64 exec, exec 304; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 305; GFX6-NEXT: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 306; GFX6-NEXT: s_waitcnt vmcnt(0) 307; GFX6-NEXT: ; return to shader part epilog 308; 309; GFX10NSA-LABEL: gather4_b_2d: 310; GFX10NSA: ; %bb.0: ; %main_body 311; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 312; GFX10NSA-NEXT: s_mov_b32 s0, s2 313; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 314; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 315; GFX10NSA-NEXT: s_mov_b32 s1, s3 316; GFX10NSA-NEXT: s_mov_b32 s2, s4 317; GFX10NSA-NEXT: s_mov_b32 s3, s5 318; GFX10NSA-NEXT: s_mov_b32 s4, s6 319; GFX10NSA-NEXT: s_mov_b32 s5, s7 320; GFX10NSA-NEXT: s_mov_b32 s6, s8 321; GFX10NSA-NEXT: s_mov_b32 s7, s9 322; GFX10NSA-NEXT: s_mov_b32 s8, s10 323; GFX10NSA-NEXT: s_mov_b32 s9, s11 324; GFX10NSA-NEXT: s_mov_b32 s10, s12 325; GFX10NSA-NEXT: s_mov_b32 s11, s13 326; GFX10NSA-NEXT: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 327; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 328; GFX10NSA-NEXT: ; return to shader part epilog 329main_body: 330 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 331 ret <4 x float> %v 332} 333 334define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) { 335; GFX6-LABEL: gather4_c_b_2d: 336; GFX6: ; %bb.0: ; %main_body 337; GFX6-NEXT: s_mov_b32 s0, s2 338; GFX6-NEXT: s_mov_b32 s1, s3 339; GFX6-NEXT: s_mov_b32 s2, s4 340; GFX6-NEXT: s_mov_b32 s3, s5 341; GFX6-NEXT: s_mov_b32 s4, s6 342; GFX6-NEXT: s_mov_b32 s5, s7 343; GFX6-NEXT: s_mov_b32 s6, s8 344; GFX6-NEXT: s_mov_b32 s7, s9 345; GFX6-NEXT: s_mov_b32 s8, s10 346; GFX6-NEXT: s_mov_b32 s9, s11 347; GFX6-NEXT: s_mov_b64 s[14:15], exec 348; GFX6-NEXT: s_mov_b32 s10, s12 349; GFX6-NEXT: s_mov_b32 s11, s13 350; GFX6-NEXT: s_wqm_b64 exec, exec 351; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 352; GFX6-NEXT: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 353; GFX6-NEXT: s_waitcnt vmcnt(0) 354; GFX6-NEXT: ; return to shader part epilog 355; 356; GFX10NSA-LABEL: gather4_c_b_2d: 357; GFX10NSA: ; %bb.0: ; %main_body 358; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 359; GFX10NSA-NEXT: s_mov_b32 s0, s2 360; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 361; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 362; GFX10NSA-NEXT: s_mov_b32 s1, s3 363; GFX10NSA-NEXT: s_mov_b32 s2, s4 364; GFX10NSA-NEXT: s_mov_b32 s3, s5 365; GFX10NSA-NEXT: s_mov_b32 s4, s6 366; GFX10NSA-NEXT: s_mov_b32 s5, s7 367; GFX10NSA-NEXT: s_mov_b32 s6, s8 368; GFX10NSA-NEXT: s_mov_b32 s7, s9 369; GFX10NSA-NEXT: s_mov_b32 s8, s10 370; GFX10NSA-NEXT: s_mov_b32 s9, s11 371; GFX10NSA-NEXT: s_mov_b32 s10, s12 372; GFX10NSA-NEXT: s_mov_b32 s11, s13 373; GFX10NSA-NEXT: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 374; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 375; GFX10NSA-NEXT: ; return to shader part epilog 376main_body: 377 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 378 ret <4 x float> %v 379} 380 381define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) { 382; GFX6-LABEL: gather4_b_cl_2d: 383; GFX6: ; %bb.0: ; %main_body 384; GFX6-NEXT: s_mov_b32 s0, s2 385; GFX6-NEXT: s_mov_b32 s1, s3 386; GFX6-NEXT: s_mov_b32 s2, s4 387; GFX6-NEXT: s_mov_b32 s3, s5 388; GFX6-NEXT: s_mov_b32 s4, s6 389; GFX6-NEXT: s_mov_b32 s5, s7 390; GFX6-NEXT: s_mov_b32 s6, s8 391; GFX6-NEXT: s_mov_b32 s7, s9 392; GFX6-NEXT: s_mov_b32 s8, s10 393; GFX6-NEXT: s_mov_b32 s9, s11 394; GFX6-NEXT: s_mov_b64 s[14:15], exec 395; GFX6-NEXT: s_mov_b32 s10, s12 396; GFX6-NEXT: s_mov_b32 s11, s13 397; GFX6-NEXT: s_wqm_b64 exec, exec 398; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 399; GFX6-NEXT: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 400; GFX6-NEXT: s_waitcnt vmcnt(0) 401; GFX6-NEXT: ; return to shader part epilog 402; 403; GFX10NSA-LABEL: gather4_b_cl_2d: 404; GFX10NSA: ; %bb.0: ; %main_body 405; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 406; GFX10NSA-NEXT: s_mov_b32 s0, s2 407; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 408; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 409; GFX10NSA-NEXT: s_mov_b32 s1, s3 410; GFX10NSA-NEXT: s_mov_b32 s2, s4 411; GFX10NSA-NEXT: s_mov_b32 s3, s5 412; GFX10NSA-NEXT: s_mov_b32 s4, s6 413; GFX10NSA-NEXT: s_mov_b32 s5, s7 414; GFX10NSA-NEXT: s_mov_b32 s6, s8 415; GFX10NSA-NEXT: s_mov_b32 s7, s9 416; GFX10NSA-NEXT: s_mov_b32 s8, s10 417; GFX10NSA-NEXT: s_mov_b32 s9, s11 418; GFX10NSA-NEXT: s_mov_b32 s10, s12 419; GFX10NSA-NEXT: s_mov_b32 s11, s13 420; GFX10NSA-NEXT: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 421; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 422; GFX10NSA-NEXT: ; return to shader part epilog 423main_body: 424 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 425 ret <4 x float> %v 426} 427 428define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { 429; GFX6-LABEL: gather4_c_b_cl_2d: 430; GFX6: ; %bb.0: ; %main_body 431; GFX6-NEXT: s_mov_b32 s0, s2 432; GFX6-NEXT: s_mov_b32 s1, s3 433; GFX6-NEXT: s_mov_b32 s2, s4 434; GFX6-NEXT: s_mov_b32 s3, s5 435; GFX6-NEXT: s_mov_b32 s4, s6 436; GFX6-NEXT: s_mov_b32 s5, s7 437; GFX6-NEXT: s_mov_b32 s6, s8 438; GFX6-NEXT: s_mov_b32 s7, s9 439; GFX6-NEXT: s_mov_b32 s8, s10 440; GFX6-NEXT: s_mov_b32 s9, s11 441; GFX6-NEXT: s_mov_b64 s[14:15], exec 442; GFX6-NEXT: s_mov_b32 s10, s12 443; GFX6-NEXT: s_mov_b32 s11, s13 444; GFX6-NEXT: s_wqm_b64 exec, exec 445; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 446; GFX6-NEXT: image_gather4_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 447; GFX6-NEXT: s_waitcnt vmcnt(0) 448; GFX6-NEXT: ; return to shader part epilog 449; 450; GFX10NSA-LABEL: gather4_c_b_cl_2d: 451; GFX10NSA: ; %bb.0: ; %main_body 452; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 453; GFX10NSA-NEXT: s_mov_b32 s0, s2 454; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 455; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 456; GFX10NSA-NEXT: s_mov_b32 s1, s3 457; GFX10NSA-NEXT: s_mov_b32 s2, s4 458; GFX10NSA-NEXT: s_mov_b32 s3, s5 459; GFX10NSA-NEXT: s_mov_b32 s4, s6 460; GFX10NSA-NEXT: s_mov_b32 s5, s7 461; GFX10NSA-NEXT: s_mov_b32 s6, s8 462; GFX10NSA-NEXT: s_mov_b32 s7, s9 463; GFX10NSA-NEXT: s_mov_b32 s8, s10 464; GFX10NSA-NEXT: s_mov_b32 s9, s11 465; GFX10NSA-NEXT: s_mov_b32 s10, s12 466; GFX10NSA-NEXT: s_mov_b32 s11, s13 467; GFX10NSA-NEXT: image_gather4_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 468; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 469; GFX10NSA-NEXT: ; return to shader part epilog 470main_body: 471 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 472 ret <4 x float> %v 473} 474 475define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { 476; GFX6-LABEL: gather4_l_2d: 477; GFX6: ; %bb.0: ; %main_body 478; GFX6-NEXT: s_mov_b32 s0, s2 479; GFX6-NEXT: s_mov_b32 s1, s3 480; GFX6-NEXT: s_mov_b32 s2, s4 481; GFX6-NEXT: s_mov_b32 s3, s5 482; GFX6-NEXT: s_mov_b32 s4, s6 483; GFX6-NEXT: s_mov_b32 s5, s7 484; GFX6-NEXT: s_mov_b32 s6, s8 485; GFX6-NEXT: s_mov_b32 s7, s9 486; GFX6-NEXT: s_mov_b32 s8, s10 487; GFX6-NEXT: s_mov_b32 s9, s11 488; GFX6-NEXT: s_mov_b32 s10, s12 489; GFX6-NEXT: s_mov_b32 s11, s13 490; GFX6-NEXT: image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 491; GFX6-NEXT: s_waitcnt vmcnt(0) 492; GFX6-NEXT: ; return to shader part epilog 493; 494; GFX10NSA-LABEL: gather4_l_2d: 495; GFX10NSA: ; %bb.0: ; %main_body 496; GFX10NSA-NEXT: s_mov_b32 s0, s2 497; GFX10NSA-NEXT: s_mov_b32 s1, s3 498; GFX10NSA-NEXT: s_mov_b32 s2, s4 499; GFX10NSA-NEXT: s_mov_b32 s3, s5 500; GFX10NSA-NEXT: s_mov_b32 s4, s6 501; GFX10NSA-NEXT: s_mov_b32 s5, s7 502; GFX10NSA-NEXT: s_mov_b32 s6, s8 503; GFX10NSA-NEXT: s_mov_b32 s7, s9 504; GFX10NSA-NEXT: s_mov_b32 s8, s10 505; GFX10NSA-NEXT: s_mov_b32 s9, s11 506; GFX10NSA-NEXT: s_mov_b32 s10, s12 507; GFX10NSA-NEXT: s_mov_b32 s11, s13 508; GFX10NSA-NEXT: image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 509; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 510; GFX10NSA-NEXT: ; return to shader part epilog 511main_body: 512 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 1, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 513 ret <4 x float> %v 514} 515 516define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { 517; GFX6-LABEL: gather4_c_l_2d: 518; GFX6: ; %bb.0: ; %main_body 519; GFX6-NEXT: s_mov_b32 s0, s2 520; GFX6-NEXT: s_mov_b32 s1, s3 521; GFX6-NEXT: s_mov_b32 s2, s4 522; GFX6-NEXT: s_mov_b32 s3, s5 523; GFX6-NEXT: s_mov_b32 s4, s6 524; GFX6-NEXT: s_mov_b32 s5, s7 525; GFX6-NEXT: s_mov_b32 s6, s8 526; GFX6-NEXT: s_mov_b32 s7, s9 527; GFX6-NEXT: s_mov_b32 s8, s10 528; GFX6-NEXT: s_mov_b32 s9, s11 529; GFX6-NEXT: s_mov_b32 s10, s12 530; GFX6-NEXT: s_mov_b32 s11, s13 531; GFX6-NEXT: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 532; GFX6-NEXT: s_waitcnt vmcnt(0) 533; GFX6-NEXT: ; return to shader part epilog 534; 535; GFX10NSA-LABEL: gather4_c_l_2d: 536; GFX10NSA: ; %bb.0: ; %main_body 537; GFX10NSA-NEXT: s_mov_b32 s0, s2 538; GFX10NSA-NEXT: s_mov_b32 s1, s3 539; GFX10NSA-NEXT: s_mov_b32 s2, s4 540; GFX10NSA-NEXT: s_mov_b32 s3, s5 541; GFX10NSA-NEXT: s_mov_b32 s4, s6 542; GFX10NSA-NEXT: s_mov_b32 s5, s7 543; GFX10NSA-NEXT: s_mov_b32 s6, s8 544; GFX10NSA-NEXT: s_mov_b32 s7, s9 545; GFX10NSA-NEXT: s_mov_b32 s8, s10 546; GFX10NSA-NEXT: s_mov_b32 s9, s11 547; GFX10NSA-NEXT: s_mov_b32 s10, s12 548; GFX10NSA-NEXT: s_mov_b32 s11, s13 549; GFX10NSA-NEXT: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 550; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 551; GFX10NSA-NEXT: ; return to shader part epilog 552main_body: 553 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 554 ret <4 x float> %v 555} 556 557define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 558; GFX6-LABEL: gather4_lz_2d: 559; GFX6: ; %bb.0: ; %main_body 560; GFX6-NEXT: s_mov_b32 s0, s2 561; GFX6-NEXT: s_mov_b32 s1, s3 562; GFX6-NEXT: s_mov_b32 s2, s4 563; GFX6-NEXT: s_mov_b32 s3, s5 564; GFX6-NEXT: s_mov_b32 s4, s6 565; GFX6-NEXT: s_mov_b32 s5, s7 566; GFX6-NEXT: s_mov_b32 s6, s8 567; GFX6-NEXT: s_mov_b32 s7, s9 568; GFX6-NEXT: s_mov_b32 s8, s10 569; GFX6-NEXT: s_mov_b32 s9, s11 570; GFX6-NEXT: s_mov_b32 s10, s12 571; GFX6-NEXT: s_mov_b32 s11, s13 572; GFX6-NEXT: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 573; GFX6-NEXT: s_waitcnt vmcnt(0) 574; GFX6-NEXT: ; return to shader part epilog 575; 576; GFX10NSA-LABEL: gather4_lz_2d: 577; GFX10NSA: ; %bb.0: ; %main_body 578; GFX10NSA-NEXT: s_mov_b32 s0, s2 579; GFX10NSA-NEXT: s_mov_b32 s1, s3 580; GFX10NSA-NEXT: s_mov_b32 s2, s4 581; GFX10NSA-NEXT: s_mov_b32 s3, s5 582; GFX10NSA-NEXT: s_mov_b32 s4, s6 583; GFX10NSA-NEXT: s_mov_b32 s5, s7 584; GFX10NSA-NEXT: s_mov_b32 s6, s8 585; GFX10NSA-NEXT: s_mov_b32 s7, s9 586; GFX10NSA-NEXT: s_mov_b32 s8, s10 587; GFX10NSA-NEXT: s_mov_b32 s9, s11 588; GFX10NSA-NEXT: s_mov_b32 s10, s12 589; GFX10NSA-NEXT: s_mov_b32 s11, s13 590; GFX10NSA-NEXT: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 591; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 592; GFX10NSA-NEXT: ; return to shader part epilog 593main_body: 594 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 595 ret <4 x float> %v 596} 597 598define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 599; GFX6-LABEL: gather4_c_lz_2d: 600; GFX6: ; %bb.0: ; %main_body 601; GFX6-NEXT: s_mov_b32 s0, s2 602; GFX6-NEXT: s_mov_b32 s1, s3 603; GFX6-NEXT: s_mov_b32 s2, s4 604; GFX6-NEXT: s_mov_b32 s3, s5 605; GFX6-NEXT: s_mov_b32 s4, s6 606; GFX6-NEXT: s_mov_b32 s5, s7 607; GFX6-NEXT: s_mov_b32 s6, s8 608; GFX6-NEXT: s_mov_b32 s7, s9 609; GFX6-NEXT: s_mov_b32 s8, s10 610; GFX6-NEXT: s_mov_b32 s9, s11 611; GFX6-NEXT: s_mov_b32 s10, s12 612; GFX6-NEXT: s_mov_b32 s11, s13 613; GFX6-NEXT: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 614; GFX6-NEXT: s_waitcnt vmcnt(0) 615; GFX6-NEXT: ; return to shader part epilog 616; 617; GFX10NSA-LABEL: gather4_c_lz_2d: 618; GFX10NSA: ; %bb.0: ; %main_body 619; GFX10NSA-NEXT: s_mov_b32 s0, s2 620; GFX10NSA-NEXT: s_mov_b32 s1, s3 621; GFX10NSA-NEXT: s_mov_b32 s2, s4 622; GFX10NSA-NEXT: s_mov_b32 s3, s5 623; GFX10NSA-NEXT: s_mov_b32 s4, s6 624; GFX10NSA-NEXT: s_mov_b32 s5, s7 625; GFX10NSA-NEXT: s_mov_b32 s6, s8 626; GFX10NSA-NEXT: s_mov_b32 s7, s9 627; GFX10NSA-NEXT: s_mov_b32 s8, s10 628; GFX10NSA-NEXT: s_mov_b32 s9, s11 629; GFX10NSA-NEXT: s_mov_b32 s10, s12 630; GFX10NSA-NEXT: s_mov_b32 s11, s13 631; GFX10NSA-NEXT: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 632; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 633; GFX10NSA-NEXT: ; return to shader part epilog 634main_body: 635 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 636 ret <4 x float> %v 637} 638 639define amdgpu_ps <4 x float> @gather4_2d_dmask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 640; GFX6-LABEL: gather4_2d_dmask_2: 641; GFX6: ; %bb.0: ; %main_body 642; GFX6-NEXT: s_mov_b32 s0, s2 643; GFX6-NEXT: s_mov_b32 s1, s3 644; GFX6-NEXT: s_mov_b32 s2, s4 645; GFX6-NEXT: s_mov_b32 s3, s5 646; GFX6-NEXT: s_mov_b32 s4, s6 647; GFX6-NEXT: s_mov_b32 s5, s7 648; GFX6-NEXT: s_mov_b32 s6, s8 649; GFX6-NEXT: s_mov_b32 s7, s9 650; GFX6-NEXT: s_mov_b32 s8, s10 651; GFX6-NEXT: s_mov_b32 s9, s11 652; GFX6-NEXT: s_mov_b64 s[14:15], exec 653; GFX6-NEXT: s_mov_b32 s10, s12 654; GFX6-NEXT: s_mov_b32 s11, s13 655; GFX6-NEXT: s_wqm_b64 exec, exec 656; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 657; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2 658; GFX6-NEXT: s_waitcnt vmcnt(0) 659; GFX6-NEXT: ; return to shader part epilog 660; 661; GFX10NSA-LABEL: gather4_2d_dmask_2: 662; GFX10NSA: ; %bb.0: ; %main_body 663; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 664; GFX10NSA-NEXT: s_mov_b32 s0, s2 665; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 666; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 667; GFX10NSA-NEXT: s_mov_b32 s1, s3 668; GFX10NSA-NEXT: s_mov_b32 s2, s4 669; GFX10NSA-NEXT: s_mov_b32 s3, s5 670; GFX10NSA-NEXT: s_mov_b32 s4, s6 671; GFX10NSA-NEXT: s_mov_b32 s5, s7 672; GFX10NSA-NEXT: s_mov_b32 s6, s8 673; GFX10NSA-NEXT: s_mov_b32 s7, s9 674; GFX10NSA-NEXT: s_mov_b32 s8, s10 675; GFX10NSA-NEXT: s_mov_b32 s9, s11 676; GFX10NSA-NEXT: s_mov_b32 s10, s12 677; GFX10NSA-NEXT: s_mov_b32 s11, s13 678; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_2D 679; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 680; GFX10NSA-NEXT: ; return to shader part epilog 681main_body: 682 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 2, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 683 ret <4 x float> %v 684} 685 686define amdgpu_ps <4 x float> @gather4_2d_dmask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 687; GFX6-LABEL: gather4_2d_dmask_4: 688; GFX6: ; %bb.0: ; %main_body 689; GFX6-NEXT: s_mov_b32 s0, s2 690; GFX6-NEXT: s_mov_b32 s1, s3 691; GFX6-NEXT: s_mov_b32 s2, s4 692; GFX6-NEXT: s_mov_b32 s3, s5 693; GFX6-NEXT: s_mov_b32 s4, s6 694; GFX6-NEXT: s_mov_b32 s5, s7 695; GFX6-NEXT: s_mov_b32 s6, s8 696; GFX6-NEXT: s_mov_b32 s7, s9 697; GFX6-NEXT: s_mov_b32 s8, s10 698; GFX6-NEXT: s_mov_b32 s9, s11 699; GFX6-NEXT: s_mov_b64 s[14:15], exec 700; GFX6-NEXT: s_mov_b32 s10, s12 701; GFX6-NEXT: s_mov_b32 s11, s13 702; GFX6-NEXT: s_wqm_b64 exec, exec 703; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 704; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4 705; GFX6-NEXT: s_waitcnt vmcnt(0) 706; GFX6-NEXT: ; return to shader part epilog 707; 708; GFX10NSA-LABEL: gather4_2d_dmask_4: 709; GFX10NSA: ; %bb.0: ; %main_body 710; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 711; GFX10NSA-NEXT: s_mov_b32 s0, s2 712; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 713; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 714; GFX10NSA-NEXT: s_mov_b32 s1, s3 715; GFX10NSA-NEXT: s_mov_b32 s2, s4 716; GFX10NSA-NEXT: s_mov_b32 s3, s5 717; GFX10NSA-NEXT: s_mov_b32 s4, s6 718; GFX10NSA-NEXT: s_mov_b32 s5, s7 719; GFX10NSA-NEXT: s_mov_b32 s6, s8 720; GFX10NSA-NEXT: s_mov_b32 s7, s9 721; GFX10NSA-NEXT: s_mov_b32 s8, s10 722; GFX10NSA-NEXT: s_mov_b32 s9, s11 723; GFX10NSA-NEXT: s_mov_b32 s10, s12 724; GFX10NSA-NEXT: s_mov_b32 s11, s13 725; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D 726; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 727; GFX10NSA-NEXT: ; return to shader part epilog 728main_body: 729 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 4, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 730 ret <4 x float> %v 731} 732 733define amdgpu_ps <4 x float> @gather4_2d_dmask_8(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 734; GFX6-LABEL: gather4_2d_dmask_8: 735; GFX6: ; %bb.0: ; %main_body 736; GFX6-NEXT: s_mov_b32 s0, s2 737; GFX6-NEXT: s_mov_b32 s1, s3 738; GFX6-NEXT: s_mov_b32 s2, s4 739; GFX6-NEXT: s_mov_b32 s3, s5 740; GFX6-NEXT: s_mov_b32 s4, s6 741; GFX6-NEXT: s_mov_b32 s5, s7 742; GFX6-NEXT: s_mov_b32 s6, s8 743; GFX6-NEXT: s_mov_b32 s7, s9 744; GFX6-NEXT: s_mov_b32 s8, s10 745; GFX6-NEXT: s_mov_b32 s9, s11 746; GFX6-NEXT: s_mov_b64 s[14:15], exec 747; GFX6-NEXT: s_mov_b32 s10, s12 748; GFX6-NEXT: s_mov_b32 s11, s13 749; GFX6-NEXT: s_wqm_b64 exec, exec 750; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 751; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8 752; GFX6-NEXT: s_waitcnt vmcnt(0) 753; GFX6-NEXT: ; return to shader part epilog 754; 755; GFX10NSA-LABEL: gather4_2d_dmask_8: 756; GFX10NSA: ; %bb.0: ; %main_body 757; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 758; GFX10NSA-NEXT: s_mov_b32 s0, s2 759; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 760; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 761; GFX10NSA-NEXT: s_mov_b32 s1, s3 762; GFX10NSA-NEXT: s_mov_b32 s2, s4 763; GFX10NSA-NEXT: s_mov_b32 s3, s5 764; GFX10NSA-NEXT: s_mov_b32 s4, s6 765; GFX10NSA-NEXT: s_mov_b32 s5, s7 766; GFX10NSA-NEXT: s_mov_b32 s6, s8 767; GFX10NSA-NEXT: s_mov_b32 s7, s9 768; GFX10NSA-NEXT: s_mov_b32 s8, s10 769; GFX10NSA-NEXT: s_mov_b32 s9, s11 770; GFX10NSA-NEXT: s_mov_b32 s10, s12 771; GFX10NSA-NEXT: s_mov_b32 s11, s13 772; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_2D 773; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 774; GFX10NSA-NEXT: ; return to shader part epilog 775main_body: 776 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 8, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 777 ret <4 x float> %v 778} 779 780declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 781declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 782declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 783declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 784declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 785declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 786declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 787declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 788declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 789declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 790declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 791declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 792declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 793declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 794 795attributes #0 = { nounwind readonly } 796