1 /* 2 * Copyright © 2022 Valve Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 #include "helpers.h" 7 8 using namespace aco; 9 10 BEGIN_TEST(insert_waitcnt.ds_ordered_count) 11 if (!setup_cs(NULL, GFX10_3)) 12 return; 13 14 Operand def0(PhysReg(256), v1); 15 Operand def1(PhysReg(257), v1); 16 Operand def2(PhysReg(258), v1); 17 Operand gds_base(PhysReg(259), v1); 18 Operand chan_counter(PhysReg(260), v1); 19 Operand m(m0, s1); 20 21 Instruction* ds_instr; 22 //>> ds_ordered_count %0:v[0], %0:v[3], %0:m0 offset0:3072 gds storage:gds semantics:volatile 23 //! s_waitcnt lgkmcnt(0) 24 ds_instr = bld.ds(aco_opcode::ds_ordered_count, def0, gds_base, m, 3072u, 0u, true); 25 ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile); 26 27 //! ds_add_rtn_u32 %0:v[1], %0:v[3], %0:v[4], %0:m0 gds storage:gds semantics:volatile,atomic,rmw 28 ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, def1, gds_base, chan_counter, m, 0u, 0u, true); 29 ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw); 30 31 //! s_waitcnt lgkmcnt(0) 32 //! ds_ordered_count %0:v[2], %0:v[3], %0:m0 offset0:3840 gds storage:gds semantics:volatile 33 ds_instr = bld.ds(aco_opcode::ds_ordered_count, def2, gds_base, m, 3840u, 0u, true); 34 ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile); 35 36 finish_waitcnt_test(); 37 END_TEST 38 39 BEGIN_TEST(insert_waitcnt.clause) 40 if (!setup_cs(NULL, GFX11)) 41 return; 42 43 Definition def_v4(PhysReg(260), v1); 44 Definition def_v5(PhysReg(261), v1); 45 Definition def_v6(PhysReg(262), v1); 46 Definition def_v7(PhysReg(263), v1); 47 Operand op_v0(PhysReg(256), v1); 48 Operand op_v4(PhysReg(260), v1); 49 Operand op_v5(PhysReg(261), v1); 50 Operand op_v6(PhysReg(262), v1); 51 Operand op_v7(PhysReg(263), v1); 52 Operand desc0(PhysReg(0), s4); 53 54 //>> p_unit_test 0 55 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 56 57 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 58 //! v1: %0:v[5] = buffer_load_dword %0:s[0-3], %0:v[0], 0 59 //! v1: %0:v[6] = buffer_load_dword %0:s[0-3], %0:v[0], 0 60 //! v1: %0:v[7] = buffer_load_dword %0:s[0-3], %0:v[0], 0 61 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); 62 bld.mubuf(aco_opcode::buffer_load_dword, def_v5, desc0, op_v0, Operand::zero(), 0, false); 63 bld.mubuf(aco_opcode::buffer_load_dword, def_v6, desc0, op_v0, Operand::zero(), 0, false); 64 bld.mubuf(aco_opcode::buffer_load_dword, def_v7, desc0, op_v0, Operand::zero(), 0, false); 65 //! s_waitcnt vmcnt(0) 66 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[4], 0 67 //! v1: %0:v[5] = buffer_load_dword %0:s[0-3], %0:v[5], 0 68 //! v1: %0:v[6] = buffer_load_dword %0:s[0-3], %0:v[6], 0 69 //! v1: %0:v[7] = buffer_load_dword %0:s[0-3], %0:v[7], 0 70 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v4, Operand::zero(), 0, false); 71 bld.mubuf(aco_opcode::buffer_load_dword, def_v5, desc0, op_v5, Operand::zero(), 0, false); 72 bld.mubuf(aco_opcode::buffer_load_dword, def_v6, desc0, op_v6, Operand::zero(), 0, false); 73 bld.mubuf(aco_opcode::buffer_load_dword, def_v7, desc0, op_v7, Operand::zero(), 0, false); 74 //! s_waitcnt vmcnt(0) 75 //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[4] 76 //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[5] 77 //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[6] 78 //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[7] 79 bld.mubuf(aco_opcode::buffer_store_dword, desc0, op_v0, Operand::zero(), op_v4, 0, false); 80 bld.mubuf(aco_opcode::buffer_store_dword, desc0, op_v0, Operand::zero(), op_v5, 0, false); 81 bld.mubuf(aco_opcode::buffer_store_dword, desc0, op_v0, Operand::zero(), op_v6, 0, false); 82 bld.mubuf(aco_opcode::buffer_store_dword, desc0, op_v0, Operand::zero(), op_v7, 0, false); 83 84 //>> p_unit_test 1 85 bld.reset(program->create_and_insert_block()); 86 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); 87 88 //! s4: %0:s[4-7] = s_load_dwordx4 %0:s[0-1], 0 89 bld.smem(aco_opcode::s_load_dwordx4, Definition(PhysReg(4), s4), Operand(PhysReg(0), s2), 90 Operand::zero()); 91 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 92 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); 93 //! s_waitcnt lgkmcnt(0) vmcnt(0) 94 //! v1: %0:v[5] = buffer_load_dword %0:s[4-7], %0:v[4], 0 95 bld.mubuf(aco_opcode::buffer_load_dword, def_v5, Operand(PhysReg(4), s4), op_v4, Operand::zero(), 96 0, false); 97 98 //>> p_unit_test 2 99 bld.reset(program->create_and_insert_block()); 100 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); 101 102 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 103 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); 104 //! v_nop 105 bld.vop1(aco_opcode::v_nop); 106 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 107 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); 108 //! s_waitcnt vmcnt(0) 109 //! v1: %0:v[5] = buffer_load_dword %0:s[0-3], %0:v[4], 0 110 bld.mubuf(aco_opcode::buffer_load_dword, def_v5, desc0, op_v4, Operand::zero(), 0, false); 111 112 finish_waitcnt_test(); 113 END_TEST 114 115 BEGIN_TEST(insert_waitcnt.waw.mixed_vmem_lds.vmem) 116 if (!setup_cs(NULL, GFX10)) 117 return; 118 119 Definition def_v4(PhysReg(260), v1); 120 Operand op_v0(PhysReg(256), v1); 121 Operand desc0(PhysReg(0), s4); 122 123 //>> BB0 124 //! /* logical preds: / linear preds: / kind: top-level, */ 125 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 126 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); 127 128 //>> BB1 129 //! /* logical preds: / linear preds: / kind: */ 130 //! v1: %0:v[4] = ds_read_b32 %0:v[0] 131 bld.reset(program->create_and_insert_block()); 132 bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0); 133 134 bld.reset(program->create_and_insert_block()); 135 program->blocks[2].linear_preds.push_back(0); 136 program->blocks[2].linear_preds.push_back(1); 137 program->blocks[2].logical_preds.push_back(0); 138 program->blocks[2].logical_preds.push_back(1); 139 140 //>> BB2 141 //! /* logical preds: BB0, BB1, / linear preds: BB0, BB1, / kind: uniform, */ 142 //! s_waitcnt lgkmcnt(0) 143 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 144 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); 145 146 finish_waitcnt_test(); 147 END_TEST 148 149 BEGIN_TEST(insert_waitcnt.waw.mixed_vmem_lds.lds) 150 if (!setup_cs(NULL, GFX10)) 151 return; 152 153 Definition def_v4(PhysReg(260), v1); 154 Operand op_v0(PhysReg(256), v1); 155 Operand desc0(PhysReg(0), s4); 156 157 //>> BB0 158 //! /* logical preds: / linear preds: / kind: top-level, */ 159 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 160 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); 161 162 //>> BB1 163 //! /* logical preds: / linear preds: / kind: */ 164 //! v1: %0:v[4] = ds_read_b32 %0:v[0] 165 bld.reset(program->create_and_insert_block()); 166 bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0); 167 168 bld.reset(program->create_and_insert_block()); 169 program->blocks[2].linear_preds.push_back(0); 170 program->blocks[2].linear_preds.push_back(1); 171 program->blocks[2].logical_preds.push_back(0); 172 program->blocks[2].logical_preds.push_back(1); 173 174 //>> BB2 175 //! /* logical preds: BB0, BB1, / linear preds: BB0, BB1, / kind: uniform, */ 176 //! s_waitcnt vmcnt(0) 177 //! v1: %0:v[4] = ds_read_b32 %0:v[0] 178 bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0); 179 180 finish_waitcnt_test(); 181 END_TEST 182 183 BEGIN_TEST(insert_waitcnt.waw.vmem_types) 184 for (amd_gfx_level gfx : {GFX11, GFX12}) { 185 if (!setup_cs(NULL, gfx)) 186 continue; 187 188 Definition def_v4(PhysReg(260), v1); 189 Operand op_v0(PhysReg(256), v1); 190 Operand desc_s4(PhysReg(0), s4); 191 Operand desc_s8(PhysReg(8), s8); 192 193 //>> p_unit_test 0 194 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 195 //~gfx12! s_wait_loadcnt imm:0 196 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 197 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0)); 198 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 199 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 200 201 //>> p_unit_test 1 202 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 203 //~gfx11! s_waitcnt vmcnt(0) 204 //~gfx12! s_wait_loadcnt imm:0 205 //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 206 bld.reset(program->create_and_insert_block()); 207 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); 208 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 209 bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); 210 211 //>> p_unit_test 2 212 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 213 //~gfx11! s_waitcnt vmcnt(0) 214 //~gfx12! s_wait_loadcnt imm:0 215 //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d 216 bld.reset(program->create_and_insert_block()); 217 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); 218 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 219 bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), 220 Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); 221 222 //>> p_unit_test 3 223 //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 224 //~gfx12! s_wait_samplecnt imm:0 225 //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 226 bld.reset(program->create_and_insert_block()); 227 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); 228 bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); 229 bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); 230 231 //>> p_unit_test 4 232 //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 233 //~gfx11! s_waitcnt vmcnt(0) 234 //~gfx12! s_wait_samplecnt imm:0 235 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 236 bld.reset(program->create_and_insert_block()); 237 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); 238 bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); 239 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 240 241 //>> p_unit_test 5 242 //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 243 //~gfx11! s_waitcnt vmcnt(0) 244 //~gfx12! s_wait_samplecnt imm:0 245 //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d 246 bld.reset(program->create_and_insert_block()); 247 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); 248 bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); 249 bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), 250 Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); 251 252 //>> p_unit_test 6 253 //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d 254 //~gfx12! s_wait_bvhcnt imm:0 255 //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d 256 bld.reset(program->create_and_insert_block()); 257 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6)); 258 bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), 259 Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); 260 bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), 261 Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); 262 263 //>> p_unit_test 7 264 //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d 265 //~gfx11! s_waitcnt vmcnt(0) 266 //~gfx12! s_wait_bvhcnt imm:0 267 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 268 bld.reset(program->create_and_insert_block()); 269 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7)); 270 bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), 271 Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); 272 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 273 274 //>> p_unit_test 8 275 //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d 276 //~gfx11! s_waitcnt vmcnt(0) 277 //~gfx12! s_wait_bvhcnt imm:0 278 //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 279 bld.reset(program->create_and_insert_block()); 280 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8)); 281 bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), 282 Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); 283 bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); 284 285 //>> BB9 286 //! /* logical preds: / linear preds: / kind: */ 287 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 288 bld.reset(program->create_and_insert_block()); 289 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 290 291 //>> BB10 292 //! /* logical preds: / linear preds: / kind: */ 293 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 294 bld.reset(program->create_and_insert_block()); 295 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 296 297 bld.reset(program->create_and_insert_block()); 298 program->blocks[11].linear_preds.push_back(9); 299 program->blocks[11].linear_preds.push_back(10); 300 program->blocks[11].logical_preds.push_back(9); 301 program->blocks[11].logical_preds.push_back(10); 302 303 //>> BB11 304 //! /* logical preds: BB9, BB10, / linear preds: BB9, BB10, / kind: uniform, */ 305 //! p_unit_test 9 306 //~gfx12! s_wait_loadcnt imm:0 307 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 308 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9)); 309 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 310 311 //>> BB12 312 //! /* logical preds: / linear preds: / kind: */ 313 //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 314 bld.reset(program->create_and_insert_block()); 315 bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); 316 317 //>> BB13 318 //! /* logical preds: / linear preds: / kind: */ 319 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 320 bld.reset(program->create_and_insert_block()); 321 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 322 323 bld.reset(program->create_and_insert_block()); 324 program->blocks[14].linear_preds.push_back(12); 325 program->blocks[14].linear_preds.push_back(13); 326 program->blocks[14].logical_preds.push_back(12); 327 program->blocks[14].logical_preds.push_back(13); 328 329 //>> BB14 330 //! /* logical preds: BB12, BB13, / linear preds: BB12, BB13, / kind: uniform, */ 331 //! p_unit_test 10 332 //~gfx11! s_waitcnt vmcnt(0) 333 //~gfx12! s_wait_loadcnt imm:0 334 //~gfx12! s_wait_samplecnt imm:0 335 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 336 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10)); 337 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 338 339 finish_waitcnt_test(); 340 } 341 END_TEST 342 343 BEGIN_TEST(insert_waitcnt.vmem) 344 if (!setup_cs(NULL, GFX12)) 345 return; 346 347 Definition def_v4(PhysReg(260), v1); 348 Definition def_v5(PhysReg(261), v1); 349 Definition def_v6(PhysReg(262), v1); 350 Definition def_v7(PhysReg(263), v1); 351 Definition def_v8(PhysReg(264), v1); 352 Definition def_v9(PhysReg(265), v1); 353 Operand op_v0(PhysReg(256), v1); 354 Operand op_v4(PhysReg(260), v1); 355 Operand op_v5(PhysReg(261), v1); 356 Operand op_v6(PhysReg(262), v1); 357 Operand op_v7(PhysReg(263), v1); 358 Operand op_v8(PhysReg(264), v1); 359 Operand op_v9(PhysReg(265), v1); 360 Operand desc_s4(PhysReg(0), s4); 361 Operand desc_s8(PhysReg(8), s8); 362 363 //>> v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 364 //! v1: %0:v[5] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 365 //! v1: %0:v[6] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d unrm r128 366 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 367 bld.mimg(aco_opcode::image_sample, def_v5, desc_s8, desc_s4, Operand(v1), op_v0); 368 Instruction* instr = 369 bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v6, desc_s4, Operand(s4), Operand(v1), 370 Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))) 371 .instr; 372 instr->mimg().unrm = true; 373 instr->mimg().r128 = true; 374 375 //! v1: %0:v[7] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d 376 //! v1: %0:v[8] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 377 //! v1: %0:v[9] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d unrm r128 378 bld.mimg(aco_opcode::image_load, def_v7, desc_s8, Operand(s4), Operand(v1), op_v0, 0x1); 379 bld.mimg(aco_opcode::image_sample, def_v8, desc_s8, desc_s4, Operand(v1), op_v0); 380 instr = bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v9, desc_s4, Operand(s4), 381 Operand(v1), Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))) 382 .instr; 383 instr->mimg().unrm = true; 384 instr->mimg().r128 = true; 385 386 //! s_wait_loadcnt imm:1 387 //! p_unit_test 0, %0:v[4] 388 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0), op_v4); 389 //! s_wait_samplecnt imm:1 390 //! p_unit_test 1, %0:v[5] 391 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1), op_v5); 392 //! s_wait_bvhcnt imm:1 393 //! p_unit_test 2, %0:v[6] 394 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2), op_v6); 395 //! s_wait_loadcnt imm:0 396 //! p_unit_test 3, %0:v[7] 397 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3), op_v7); 398 //! s_wait_samplecnt imm:0 399 //! p_unit_test 4, %0:v[8] 400 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4), op_v8); 401 //! s_wait_bvhcnt imm:0 402 //! p_unit_test 5, %0:v[9] 403 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5), op_v9); 404 405 /* Despite not using a sampler, this uses samplecnt. */ 406 //! v1: %0:v[5] = image_msaa_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d 407 //! s_wait_samplecnt imm:0 408 //! p_unit_test 6, %0:v[5] 409 bld.mimg(aco_opcode::image_msaa_load, def_v5, desc_s8, Operand(s4), Operand(v1), op_v0); 410 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6), op_v5); 411 412 finish_waitcnt_test(); 413 END_TEST 414 415 BEGIN_TEST(insert_waitcnt.lds_smem) 416 for (amd_gfx_level gfx : {GFX11, GFX12}) { 417 if (!setup_cs(NULL, gfx)) 418 continue; 419 420 Definition def_v4(PhysReg(260), v1); 421 Definition def_v5(PhysReg(261), v1); 422 Definition def_s4(PhysReg(4), s1); 423 Definition def_s5(PhysReg(5), s1); 424 Operand op_s0(PhysReg(0), s1); 425 Operand op_s4(PhysReg(4), s1); 426 Operand op_s5(PhysReg(5), s1); 427 Operand op_v0(PhysReg(256), v1); 428 Operand op_v4(PhysReg(260), v1); 429 Operand op_v5(PhysReg(261), v1); 430 Operand desc_s4(PhysReg(0), s4); 431 432 //>> v1: %0:v[4] = ds_read_b32 %0:v[0] 433 //! s1: %0:s[4] = s_buffer_load_dword %0:s[0-3], %0:s[0] 434 //! v1: %0:v[5] = ds_read_b32 %0:v[0] 435 //! s1: %0:s[5] = s_buffer_load_dword %0:s[0-3], %0:s[0] 436 bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0); 437 bld.smem(aco_opcode::s_buffer_load_dword, def_s4, desc_s4, op_s0); 438 bld.ds(aco_opcode::ds_read_b32, def_v5, op_v0); 439 bld.smem(aco_opcode::s_buffer_load_dword, def_s5, desc_s4, op_s0); 440 441 //~gfx11! s_waitcnt lgkmcnt(1) 442 //~gfx12! s_wait_dscnt imm:1 443 //! p_unit_test 0, %0:v[4] 444 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0), op_v4); 445 //~gfx11! s_waitcnt lgkmcnt(0) 446 //~gfx12! s_wait_kmcnt imm:0 447 //! p_unit_test 1, %0:s[4] 448 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1), op_s4); 449 //~gfx12! s_wait_dscnt imm:0 450 //! p_unit_test 2, %0:v[5] 451 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2), op_v5); 452 //! p_unit_test 3, %0:s[5] 453 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3), op_s5); 454 455 finish_waitcnt_test(); 456 } 457 END_TEST 458 459 BEGIN_TEST(insert_waitcnt.sendmsg_smem) 460 for (amd_gfx_level gfx : {GFX11, GFX12}) { 461 if (!setup_cs(NULL, gfx)) 462 continue; 463 464 Definition def_s4(PhysReg(4), s1); 465 Definition def_s5(PhysReg(5), s1); 466 Definition def_s6(PhysReg(6), s1); 467 Definition def_s7(PhysReg(7), s1); 468 Operand op_s0(PhysReg(0), s1); 469 Operand op_s4(PhysReg(4), s1); 470 Operand op_s5(PhysReg(5), s1); 471 Operand op_s6(PhysReg(6), s1); 472 Operand op_s7(PhysReg(7), s1); 473 Operand desc_s4(PhysReg(0), s4); 474 475 //>> s1: %0:s[4] = s_sendmsg_rtn_b32 3 sendmsg(rtn_get_realtime) 476 //! s1: %0:s[5] = s_buffer_load_dword %0:s[0-3], %0:s[0] 477 //! s1: %0:s[6] = s_sendmsg_rtn_b32 3 sendmsg(rtn_get_realtime) 478 //! s1: %0:s[7] = s_buffer_load_dword %0:s[0-3], %0:s[0] 479 bld.sop1(aco_opcode::s_sendmsg_rtn_b32, def_s4, Operand::c32(sendmsg_rtn_get_realtime)); 480 bld.smem(aco_opcode::s_buffer_load_dword, def_s5, desc_s4, op_s0); 481 bld.sop1(aco_opcode::s_sendmsg_rtn_b32, def_s6, Operand::c32(sendmsg_rtn_get_realtime)); 482 bld.smem(aco_opcode::s_buffer_load_dword, def_s7, desc_s4, op_s0); 483 484 //~gfx12! s_wait_kmcnt imm:1 485 //~gfx11! s_waitcnt lgkmcnt(1) 486 //! p_unit_test 0, %0:s[4] 487 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0), op_s4); 488 //~gfx12! s_wait_kmcnt imm:0 489 //~gfx11! s_waitcnt lgkmcnt(0) 490 //! p_unit_test 1, %0:s[5] 491 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1), op_s5); 492 //! p_unit_test 2, %0:s[6] 493 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2), op_s6); 494 //! p_unit_test 3, %0:s[7] 495 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3), op_s7); 496 497 finish_waitcnt_test(); 498 } 499 END_TEST 500 501 BEGIN_TEST(insert_waitcnt.vmem_ds) 502 if (!setup_cs(NULL, GFX12)) 503 return; 504 505 Definition def_v4(PhysReg(260), v1); 506 Definition def_v5(PhysReg(261), v1); 507 Operand op_v0(PhysReg(256), v1); 508 Operand op_v1(PhysReg(257), v1); 509 Operand op_v4(PhysReg(260), v1); 510 Operand op_v5(PhysReg(261), v1); 511 Operand desc_s4(PhysReg(0), s4); 512 513 program->workgroup_size = 128; 514 program->wgp_mode = true; 515 516 //>> v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 517 //! v1: %0:v[5] = ds_read_b32 %0:v[0] 518 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 519 bld.ds(aco_opcode::ds_read_b32, def_v5, op_v0); 520 521 //! s_wait_loadcnt_dscnt dscnt(0) loadcnt(0) 522 //! p_unit_test 0, %0:v[4], %0:v[5] 523 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0), op_v4, op_v5); 524 525 //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[1] storage:buffer 526 //! v1: %0:v[5] = ds_write_b32 %0:v[0], %0:v[1] storage:shared 527 Instruction* instr = 528 bld.mubuf(aco_opcode::buffer_store_dword, desc_s4, op_v0, Operand::zero(), op_v1, 0, false) 529 .instr; 530 instr->mubuf().sync = memory_sync_info(storage_buffer); 531 instr = bld.ds(aco_opcode::ds_write_b32, def_v5, op_v0, op_v1).instr; 532 instr->ds().sync = memory_sync_info(storage_shared); 533 534 //! s_wait_storecnt_dscnt dscnt(0) storecnt(0) 535 bld.barrier(aco_opcode::p_barrier, 536 memory_sync_info(storage_buffer | storage_shared, semantic_acqrel, scope_workgroup)); 537 538 finish_waitcnt_test(); 539 END_TEST 540