1; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI %s 3; RUN: llc -march=amdgcn -mcpu=kabini -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,16BANK %s 4; RUN: llc -march=amdgcn -mcpu=stoney -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,16BANK %s 5 6; GCN-LABEL: {{^}}v_interp: 7; GCN-NOT: s_wqm 8; GCN: s_mov_b32 m0, s{{[0-9]+}} 9; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}} 10; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}} 11; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}} 12; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, p0, attr0.x{{$}} 13define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x float> %arg4) #0 { 14main_body: 15 %i = extractelement <2 x float> %arg4, i32 0 16 %j = extractelement <2 x float> %arg4, i32 1 17 %p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 %arg3) 18 %p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, float %j, i32 0, i32 0, i32 %arg3) 19 %p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 %arg3) 20 %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, float %j, i32 1, i32 0, i32 %arg3) 21 %const = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %arg3) 22 %w = fadd float %p1_1, %const 23 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_0, float %p1_1, float %w, i1 true, i1 true) #0 24 ret void 25} 26 27; GCN-LABEL: {{^}}v_interp_p1: 28; GCN: s_movk_i32 m0, 0x100 29; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}} 30; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}} 31; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.z{{$}} 32; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.w{{$}} 33; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}} 34 35; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr1.x{{$}} 36; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr2.y{{$}} 37; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr3.z{{$}} 38; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr4.w{{$}} 39; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr63.w{{$}} 40; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr64.w{{$}} 41; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}} 42define amdgpu_ps void @v_interp_p1(float %i) #0 { 43bb: 44 %p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 256) 45 %p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 256) 46 %p0_2 = call float @llvm.amdgcn.interp.p1(float %i, i32 2, i32 0, i32 256) 47 %p0_3 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 0, i32 256) 48 %p0_4 = call float @llvm.amdgcn.interp.p1(float %i, i32 4, i32 0, i32 256) 49 %p0_5 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 1, i32 256) 50 %p0_6 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 2, i32 256) 51 %p0_7 = call float @llvm.amdgcn.interp.p1(float %i, i32 2, i32 3, i32 256) 52 %p0_8 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 4, i32 256) 53 %p0_9 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 63, i32 256) 54 %p0_10 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 64, i32 256) 55 %p0_11 = call float @llvm.amdgcn.interp.p1(float %i, i32 4, i32 64, i32 256) 56 57 store volatile float %p0_0, float addrspace(1)* undef 58 store volatile float %p0_1, float addrspace(1)* undef 59 store volatile float %p0_2, float addrspace(1)* undef 60 store volatile float %p0_3, float addrspace(1)* undef 61 store volatile float %p0_4, float addrspace(1)* undef 62 store volatile float %p0_5, float addrspace(1)* undef 63 store volatile float %p0_6, float addrspace(1)* undef 64 store volatile float %p0_7, float addrspace(1)* undef 65 store volatile float %p0_8, float addrspace(1)* undef 66 store volatile float %p0_9, float addrspace(1)* undef 67 store volatile float %p0_10, float addrspace(1)* undef 68 store volatile float %p0_11, float addrspace(1)* undef 69 ret void 70} 71 72; GCN-LABEL: {{^}}v_interp_p2: 73; GCN: s_movk_i32 m0, 0x100 74; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}} 75; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}} 76; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.z{{$}} 77; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.w{{$}} 78; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}} 79; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}} 80; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr63.x{{$}} 81; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}} 82; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}} 83define amdgpu_ps void @v_interp_p2(float %x, float %j) #0 { 84bb: 85 %p2_0 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 0, i32 256) 86 %p2_1 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 1, i32 0, i32 256) 87 %p2_2 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 2, i32 0, i32 256) 88 %p2_3 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 3, i32 0, i32 256) 89 %p2_4 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 4, i32 0, i32 256) 90 91 %p2_5 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 1, i32 256) 92 %p2_6 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 63, i32 256) 93 %p2_7 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 64, i32 256) 94 %p2_8 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 4, i32 64, i32 256) 95 96 store volatile float %p2_0, float addrspace(1)* undef 97 store volatile float %p2_1, float addrspace(1)* undef 98 store volatile float %p2_2, float addrspace(1)* undef 99 store volatile float %p2_3, float addrspace(1)* undef 100 store volatile float %p2_4, float addrspace(1)* undef 101 store volatile float %p2_5, float addrspace(1)* undef 102 store volatile float %p2_6, float addrspace(1)* undef 103 store volatile float %p2_7, float addrspace(1)* undef 104 store volatile float %p2_8, float addrspace(1)* undef 105 ret void 106} 107 108; GCN-LABEL: {{^}}v_interp_mov: 109; GCN: s_movk_i32 m0, 0x100 110; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, p10, attr0.x{{$}} 111; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, p20, attr0.x{{$}} 112; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, p0, attr0.x{{$}} 113; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, invalid_param_3, attr0.x{{$}} 114 115; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, p10, attr0.x{{$}} 116; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, p10, attr0.z{{$}} 117; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, p10, attr0.w{{$}} 118; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, p10, attr0.x{{$}} 119; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, invalid_param_8, attr0.x{{$}} 120 121; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, p10, attr63.y{{$}} 122; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, p10, attr64.y{{$}} 123; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, invalid_param_3, attr64.y{{$}} 124; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, invalid_param_10, attr64.x{{$}} 125define amdgpu_ps void @v_interp_mov(float %x, float %j) #0 { 126bb: 127 %mov_0 = call float @llvm.amdgcn.interp.mov(i32 0, i32 0, i32 0, i32 256) 128 %mov_1 = call float @llvm.amdgcn.interp.mov(i32 1, i32 0, i32 0, i32 256) 129 %mov_2 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 256) 130 %mov_3 = call float @llvm.amdgcn.interp.mov(i32 3, i32 0, i32 0, i32 256) 131 132 %mov_4 = call float @llvm.amdgcn.interp.mov(i32 0, i32 1, i32 0, i32 256) 133 %mov_5 = call float @llvm.amdgcn.interp.mov(i32 0, i32 2, i32 0, i32 256) 134 %mov_6 = call float @llvm.amdgcn.interp.mov(i32 0, i32 3, i32 0, i32 256) 135 %mov_7 = call float @llvm.amdgcn.interp.mov(i32 0, i32 4, i32 0, i32 256) 136 %mov_8 = call float @llvm.amdgcn.interp.mov(i32 8, i32 4, i32 0, i32 256) 137 138 %mov_9 = call float @llvm.amdgcn.interp.mov(i32 0, i32 1, i32 63, i32 256) 139 %mov_10 = call float @llvm.amdgcn.interp.mov(i32 0, i32 1, i32 64, i32 256) 140 %mov_11 = call float @llvm.amdgcn.interp.mov(i32 3, i32 1, i32 64, i32 256) 141 %mov_12 = call float @llvm.amdgcn.interp.mov(i32 10, i32 4, i32 64, i32 256) 142 143 store volatile float %mov_0, float addrspace(1)* undef 144 store volatile float %mov_1, float addrspace(1)* undef 145 store volatile float %mov_2, float addrspace(1)* undef 146 store volatile float %mov_3, float addrspace(1)* undef 147 148 store volatile float %mov_4, float addrspace(1)* undef 149 store volatile float %mov_5, float addrspace(1)* undef 150 store volatile float %mov_6, float addrspace(1)* undef 151 store volatile float %mov_7, float addrspace(1)* undef 152 store volatile float %mov_8, float addrspace(1)* undef 153 154 store volatile float %mov_9, float addrspace(1)* undef 155 store volatile float %mov_10, float addrspace(1)* undef 156 store volatile float %mov_11, float addrspace(1)* undef 157 store volatile float %mov_12, float addrspace(1)* undef 158 ret void 159} 160 161; SI won't merge ds memory operations, because of the signed offset bug, so 162; we only have check lines for VI. 163; 164; TODO: VI won't merge them either, because we are conservative about moving 165; instructions past changes to physregs. 166; 167; TODO-VI-LABEL: v_interp_readnone: 168; TODO-VI: s_mov_b32 m0, 0 169; TODO-VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 170; TODO-VI-DAG: v_interp_mov_f32_e32 v{{[0-9]+}}, p0, attr0.x{{$}} 171; TODO-VI: s_mov_b32 m0, -1{{$}} 172; TODO-VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4 173;define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) #0 { 174;bb: 175; store float 0.000000e+00, float addrspace(3)* %lds 176; %tmp1 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 0) 177; %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4 178; store float 0.000000e+00, float addrspace(3)* %tmp2 179; call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0 180; ret void 181;} 182 183; Thest that v_interp_p1 uses different source and destination registers 184; on 16 bank LDS chips. 185 186; GCN-LABEL: {{^}}v_interp_p1_bank16_bug: 187; 16BANK-NOT: v_interp_p1_f32{{(_e32)*}} [[DST:v[0-9]+]], [[DST]] 188define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg13, [17 x <4 x i32>] addrspace(2)* byval %arg14, [34 x <8 x i32>] addrspace(2)* byval %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) #0 { 189main_body: 190 %i.i = extractelement <2 x i32> %arg19, i32 0 191 %j.i = extractelement <2 x i32> %arg19, i32 1 192 %i.f.i = bitcast i32 %i.i to float 193 %j.f.i = bitcast i32 %j.i to float 194 %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg17) #0 195 %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg17) #0 196 %i.i7 = extractelement <2 x i32> %arg19, i32 0 197 %j.i8 = extractelement <2 x i32> %arg19, i32 1 198 %i.f.i9 = bitcast i32 %i.i7 to float 199 %j.f.i10 = bitcast i32 %j.i8 to float 200 %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 0, i32 %arg17) #0 201 %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 0, i32 %arg17) #0 202 %i.i1 = extractelement <2 x i32> %arg19, i32 0 203 %j.i2 = extractelement <2 x i32> %arg19, i32 1 204 %i.f.i3 = bitcast i32 %i.i1 to float 205 %j.f.i4 = bitcast i32 %j.i2 to float 206 %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 0, i32 %arg17) #0 207 %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 0, i32 %arg17) #0 208 %tmp = call float @llvm.fabs.f32(float %p2.i) 209 %tmp34 = call float @llvm.fabs.f32(float %p2.i12) 210 %tmp35 = call float @llvm.fabs.f32(float %p2.i6) 211 %tmp36 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp, float %tmp34) 212 %tmp38 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp35, float 1.000000e+00) 213 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp36, <2 x half> %tmp38, i1 true, i1 true) #0 214 ret void 215} 216 217declare float @llvm.fabs.f32(float) #1 218declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 219declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 220declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1 221declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 222declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0 223declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1 224 225attributes #0 = { nounwind } 226attributes #1 = { nounwind readnone } 227