1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_instr_mem.h"
28
29 #include "nir_intrinsics.h"
30 #include "nir_intrinsics_indices.h"
31 #include "sfn_alu_defines.h"
32 #include "sfn_instr_alu.h"
33 #include "sfn_instr_fetch.h"
34 #include "sfn_instr_tex.h"
35 #include "sfn_shader.h"
36 #include "sfn_virtualvalues.h"
37
38 namespace r600 {
39
GDSInstr(ESDOp op,Register * dest,const RegisterVec4 & src,int uav_base,PRegister uav_id)40 GDSInstr::GDSInstr(
41 ESDOp op, Register *dest, const RegisterVec4& src, int uav_base, PRegister uav_id):
42 Resource(this, uav_base, uav_id),
43 m_op(op),
44 m_dest(dest),
45 m_src(src)
46 {
47 set_always_keep();
48
49 m_src.add_use(this);
50 if (m_dest)
51 m_dest->add_parent(this);
52 }
53
54 bool
is_equal_to(const GDSInstr & rhs) const55 GDSInstr::is_equal_to(const GDSInstr& rhs) const
56 {
57 #define NE(X) (X != rhs.X)
58
59 if (NE(m_op) || NE(m_src))
60 return false;
61
62 sfn_value_equal(m_dest, rhs.m_dest);
63
64 return resource_is_equal(rhs);
65 }
66
67 void
accept(ConstInstrVisitor & visitor) const68 GDSInstr::accept(ConstInstrVisitor& visitor) const
69 {
70 visitor.visit(*this);
71 }
72
73 void
accept(InstrVisitor & visitor)74 GDSInstr::accept(InstrVisitor& visitor)
75 {
76 visitor.visit(this);
77 }
78
79 bool
do_ready() const80 GDSInstr::do_ready() const
81 {
82 return m_src.ready(block_id(), index()) && resource_ready(block_id(), index());
83 }
84
85 void
do_print(std::ostream & os) const86 GDSInstr::do_print(std::ostream& os) const
87 {
88 os << "GDS " << lds_ops.at(m_op).name;
89 if (m_dest)
90 os << *m_dest;
91 else
92 os << "___";
93 os << " " << m_src;
94 os << " BASE:" << resource_id();
95
96 print_resource_offset(os);
97 }
98
99 bool
emit_atomic_counter(nir_intrinsic_instr * intr,Shader & shader)100 GDSInstr::emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader)
101 {
102 switch (intr->intrinsic) {
103 case nir_intrinsic_atomic_counter_add:
104 case nir_intrinsic_atomic_counter_and:
105 case nir_intrinsic_atomic_counter_exchange:
106 case nir_intrinsic_atomic_counter_max:
107 case nir_intrinsic_atomic_counter_min:
108 case nir_intrinsic_atomic_counter_or:
109 case nir_intrinsic_atomic_counter_xor:
110 case nir_intrinsic_atomic_counter_comp_swap:
111 return emit_atomic_op2(intr, shader);
112 case nir_intrinsic_atomic_counter_read:
113 case nir_intrinsic_atomic_counter_post_dec:
114 return emit_atomic_read(intr, shader);
115 case nir_intrinsic_atomic_counter_inc:
116 return emit_atomic_inc(intr, shader);
117 case nir_intrinsic_atomic_counter_pre_dec:
118 return emit_atomic_pre_dec(intr, shader);
119 default:
120 return false;
121 }
122 }
123
allowed_src_chan_mask() const124 uint8_t GDSInstr::allowed_src_chan_mask() const
125 {
126 return m_src.free_chan_mask();
127 }
128
129 static ESDOp
get_opcode(const nir_intrinsic_op opcode)130 get_opcode(const nir_intrinsic_op opcode)
131 {
132 switch (opcode) {
133 case nir_intrinsic_atomic_counter_add:
134 return DS_OP_ADD_RET;
135 case nir_intrinsic_atomic_counter_and:
136 return DS_OP_AND_RET;
137 case nir_intrinsic_atomic_counter_exchange:
138 return DS_OP_XCHG_RET;
139 case nir_intrinsic_atomic_counter_inc:
140 return DS_OP_INC_RET;
141 case nir_intrinsic_atomic_counter_max:
142 return DS_OP_MAX_UINT_RET;
143 case nir_intrinsic_atomic_counter_min:
144 return DS_OP_MIN_UINT_RET;
145 case nir_intrinsic_atomic_counter_or:
146 return DS_OP_OR_RET;
147 case nir_intrinsic_atomic_counter_read:
148 return DS_OP_READ_RET;
149 case nir_intrinsic_atomic_counter_xor:
150 return DS_OP_XOR_RET;
151 case nir_intrinsic_atomic_counter_post_dec:
152 return DS_OP_DEC_RET;
153 case nir_intrinsic_atomic_counter_comp_swap:
154 return DS_OP_CMP_XCHG_RET;
155 case nir_intrinsic_atomic_counter_pre_dec:
156 default:
157 return DS_OP_INVALID;
158 }
159 }
160
161 static ESDOp
get_opcode_wo(const nir_intrinsic_op opcode)162 get_opcode_wo(const nir_intrinsic_op opcode)
163 {
164 switch (opcode) {
165 case nir_intrinsic_atomic_counter_add:
166 return DS_OP_ADD;
167 case nir_intrinsic_atomic_counter_and:
168 return DS_OP_AND;
169 case nir_intrinsic_atomic_counter_inc:
170 return DS_OP_INC;
171 case nir_intrinsic_atomic_counter_max:
172 return DS_OP_MAX_UINT;
173 case nir_intrinsic_atomic_counter_min:
174 return DS_OP_MIN_UINT;
175 case nir_intrinsic_atomic_counter_or:
176 return DS_OP_OR;
177 case nir_intrinsic_atomic_counter_xor:
178 return DS_OP_XOR;
179 case nir_intrinsic_atomic_counter_post_dec:
180 return DS_OP_DEC;
181 case nir_intrinsic_atomic_counter_comp_swap:
182 return DS_OP_CMP_XCHG_RET;
183 case nir_intrinsic_atomic_counter_exchange:
184 return DS_OP_XCHG_RET;
185 case nir_intrinsic_atomic_counter_pre_dec:
186 default:
187 return DS_OP_INVALID;
188 }
189 }
190
191 bool
emit_atomic_op2(nir_intrinsic_instr * instr,Shader & shader)192 GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader)
193 {
194 auto& vf = shader.value_factory();
195 bool read_result = !list_is_empty(&instr->def.uses);
196
197 ESDOp op =
198 read_result ? get_opcode(instr->intrinsic) : get_opcode_wo(instr->intrinsic);
199
200 if (DS_OP_INVALID == op)
201 return false;
202
203 auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
204 {
205 }
206 offset += nir_intrinsic_base(instr);
207
208 auto dest = read_result ? vf.dest(instr->def, 0, pin_free) : nullptr;
209
210 PRegister src_as_register = nullptr;
211 auto src_val = vf.src(instr->src[1], 0);
212 if (!src_val->as_register()) {
213 auto temp_src_val = vf.temp_register();
214 shader.emit_instruction(
215 new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::last_write));
216 src_as_register = temp_src_val;
217 } else
218 src_as_register = src_val->as_register();
219
220 if (uav_id != nullptr)
221 shader.set_flag(Shader::sh_indirect_atomic);
222
223 GDSInstr *ir = nullptr;
224 if (shader.chip_class() < ISA_CC_CAYMAN) {
225 RegisterVec4 src(nullptr, src_as_register, nullptr, nullptr, pin_free);
226 ir = new GDSInstr(op, dest, src, offset, uav_id);
227
228 } else {
229 auto dest = vf.dest(instr->def, 0, pin_free);
230 auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
231 if (uav_id)
232 shader.emit_instruction(new AluInstr(op3_muladd_uint24,
233 tmp[0],
234 uav_id,
235 vf.literal(4),
236 vf.literal(4 * offset),
237 AluInstr::write));
238 else
239 shader.emit_instruction(
240 new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
241 shader.emit_instruction(
242 new AluInstr(op1_mov, tmp[1], src_val, AluInstr::last_write));
243 ir = new GDSInstr(op, dest, tmp, 0, nullptr);
244 }
245 shader.emit_instruction(ir);
246 return true;
247 }
248
249 bool
emit_atomic_read(nir_intrinsic_instr * instr,Shader & shader)250 GDSInstr::emit_atomic_read(nir_intrinsic_instr *instr, Shader& shader)
251 {
252 auto& vf = shader.value_factory();
253
254 auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
255 {
256 }
257 offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
258
259 auto dest = vf.dest(instr->def, 0, pin_free);
260
261 GDSInstr *ir = nullptr;
262
263 if (shader.chip_class() < ISA_CC_CAYMAN) {
264 RegisterVec4 src = RegisterVec4(0, true, {7, 7, 7, 7});
265 ir = new GDSInstr(DS_OP_READ_RET, dest, src, offset, uav_id);
266 } else {
267 auto tmp = vf.temp_vec4(pin_group, {0, 7, 7, 7});
268 if (uav_id)
269 shader.emit_instruction(new AluInstr(op3_muladd_uint24,
270 tmp[0],
271 uav_id,
272 vf.literal(4),
273 vf.literal(4 * offset),
274 AluInstr::write));
275 else
276 shader.emit_instruction(
277 new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
278
279 ir = new GDSInstr(DS_OP_READ_RET, dest, tmp, 0, nullptr);
280 }
281
282 shader.emit_instruction(ir);
283 return true;
284 }
285
286 bool
emit_atomic_inc(nir_intrinsic_instr * instr,Shader & shader)287 GDSInstr::emit_atomic_inc(nir_intrinsic_instr *instr, Shader& shader)
288 {
289 auto& vf = shader.value_factory();
290 bool read_result = !list_is_empty(&instr->def.uses);
291
292 auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
293 {
294 }
295 offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
296
297 GDSInstr *ir = nullptr;
298 auto dest = read_result ? vf.dest(instr->def, 0, pin_free) : nullptr;
299
300 if (shader.chip_class() < ISA_CC_CAYMAN) {
301 RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
302 ir =
303 new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest, src, offset, uav_id);
304 } else {
305 auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
306
307 if (uav_id)
308 shader.emit_instruction(new AluInstr(op3_muladd_uint24,
309 tmp[0],
310 uav_id,
311 vf.literal(4),
312 vf.literal(4 * offset),
313 AluInstr::write));
314 else
315 shader.emit_instruction(
316 new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
317
318 shader.emit_instruction(
319 new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
320 ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest, tmp, 0, nullptr);
321 }
322 shader.emit_instruction(ir);
323 return true;
324 }
325
326 bool
emit_atomic_pre_dec(nir_intrinsic_instr * instr,Shader & shader)327 GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader)
328 {
329 auto& vf = shader.value_factory();
330
331 bool read_result = !list_is_empty(&instr->def.uses);
332
333 auto opcode = read_result ? DS_OP_SUB_RET : DS_OP_SUB;
334
335 auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
336 {
337 }
338 offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
339
340
341 auto *tmp_dest = read_result ? vf.temp_register() : nullptr;
342
343 GDSInstr *ir = nullptr;
344
345 if (shader.chip_class() < ISA_CC_CAYMAN) {
346 RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
347 ir = new GDSInstr(opcode, tmp_dest, src, offset, uav_id);
348 } else {
349 auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
350 if (uav_id)
351 shader.emit_instruction(new AluInstr(op3_muladd_uint24,
352 tmp[0],
353 uav_id,
354 vf.literal(4),
355 vf.literal(4 * offset),
356 AluInstr::write));
357 else
358 shader.emit_instruction(
359 new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
360
361 shader.emit_instruction(
362 new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
363 ir = new GDSInstr(opcode, tmp_dest, tmp, 0, nullptr);
364 }
365
366 shader.emit_instruction(ir);
367 if (read_result)
368 shader.emit_instruction(new AluInstr(op2_sub_int,
369 vf.dest(instr->def, 0, pin_free),
370 tmp_dest,
371 vf.one_i(),
372 AluInstr::last_write));
373 return true;
374 }
375
update_indirect_addr(PRegister old_reg,PRegister addr)376 void GDSInstr::update_indirect_addr(PRegister old_reg, PRegister addr)
377 {
378 (void)old_reg;
379 set_resource_offset(addr);
380 }
381
RatInstr(ECFOpCode cf_opcode,ERatOp rat_op,const RegisterVec4 & data,const RegisterVec4 & index,int rat_id,PRegister rat_id_offset,int burst_count,int comp_mask,int element_size)382 RatInstr::RatInstr(ECFOpCode cf_opcode,
383 ERatOp rat_op,
384 const RegisterVec4& data,
385 const RegisterVec4& index,
386 int rat_id,
387 PRegister rat_id_offset,
388 int burst_count,
389 int comp_mask,
390 int element_size):
391 Resource(this, rat_id, rat_id_offset),
392 m_cf_opcode(cf_opcode),
393 m_rat_op(rat_op),
394 m_data(data),
395 m_index(index),
396 m_burst_count(burst_count),
397 m_comp_mask(comp_mask),
398 m_element_size(element_size)
399 {
400 set_always_keep();
401 m_data.add_use(this);
402 m_index.add_use(this);
403 }
404
405 void
accept(ConstInstrVisitor & visitor) const406 RatInstr::accept(ConstInstrVisitor& visitor) const
407 {
408 visitor.visit(*this);
409 }
410
411 void
accept(InstrVisitor & visitor)412 RatInstr::accept(InstrVisitor& visitor)
413 {
414 visitor.visit(this);
415 }
416
417 bool
is_equal_to(const RatInstr & lhs) const418 RatInstr::is_equal_to(const RatInstr& lhs) const
419 {
420 (void)lhs;
421 assert(0);
422 return false;
423 }
424
425 bool
do_ready() const426 RatInstr::do_ready() const
427 {
428 if (m_rat_op != STORE_TYPED) {
429 for (auto i : required_instr()) {
430 if (!i->is_scheduled()) {
431 return false;
432 }
433 }
434 }
435
436 return m_data.ready(block_id(), index()) && m_index.ready(block_id(), index());
437 }
438
439 void
do_print(std::ostream & os) const440 RatInstr::do_print(std::ostream& os) const
441 {
442 os << "MEM_RAT RAT " << resource_id();
443 print_resource_offset(os);
444 os << " @" << m_index;
445 os << " OP:" << m_rat_op << " " << m_data;
446 os << " BC:" << m_burst_count << " MASK:" << m_comp_mask << " ES:" << m_element_size;
447 if (m_need_ack)
448 os << " ACK";
449 }
450
update_indirect_addr(UNUSED PRegister old_reg,PRegister addr)451 void RatInstr::update_indirect_addr(UNUSED PRegister old_reg, PRegister addr)
452 {
453 set_resource_offset(addr);
454 }
455
456 static RatInstr::ERatOp
get_rat_opcode(const nir_atomic_op opcode)457 get_rat_opcode(const nir_atomic_op opcode)
458 {
459 switch (opcode) {
460 case nir_atomic_op_iadd:
461 return RatInstr::ADD_RTN;
462 case nir_atomic_op_iand:
463 return RatInstr::AND_RTN;
464 case nir_atomic_op_ior:
465 return RatInstr::OR_RTN;
466 case nir_atomic_op_imin:
467 return RatInstr::MIN_INT_RTN;
468 case nir_atomic_op_imax:
469 return RatInstr::MAX_INT_RTN;
470 case nir_atomic_op_umin:
471 return RatInstr::MIN_UINT_RTN;
472 case nir_atomic_op_umax:
473 return RatInstr::MAX_UINT_RTN;
474 case nir_atomic_op_ixor:
475 return RatInstr::XOR_RTN;
476 case nir_atomic_op_cmpxchg:
477 return RatInstr::CMPXCHG_INT_RTN;
478 case nir_atomic_op_xchg:
479 return RatInstr::XCHG_RTN;
480 default:
481 unreachable("Unsupported atomic");
482 }
483 }
484
485 static RatInstr::ERatOp
get_rat_opcode_wo(const nir_atomic_op opcode)486 get_rat_opcode_wo(const nir_atomic_op opcode)
487 {
488 switch (opcode) {
489 case nir_atomic_op_iadd:
490 return RatInstr::ADD;
491 case nir_atomic_op_iand:
492 return RatInstr::AND;
493 case nir_atomic_op_ior:
494 return RatInstr::OR;
495 case nir_atomic_op_imin:
496 return RatInstr::MIN_INT;
497 case nir_atomic_op_imax:
498 return RatInstr::MAX_INT;
499 case nir_atomic_op_umin:
500 return RatInstr::MIN_UINT;
501 case nir_atomic_op_umax:
502 return RatInstr::MAX_UINT;
503 case nir_atomic_op_ixor:
504 return RatInstr::XOR;
505 case nir_atomic_op_cmpxchg:
506 return RatInstr::CMPXCHG_INT;
507 case nir_atomic_op_xchg:
508 return RatInstr::XCHG_RTN;
509 default:
510 unreachable("Unsupported atomic");
511 }
512 }
513
514 bool
emit(nir_intrinsic_instr * intr,Shader & shader)515 RatInstr::emit(nir_intrinsic_instr *intr, Shader& shader)
516 {
517 switch (intr->intrinsic) {
518 case nir_intrinsic_load_ssbo:
519 return emit_ssbo_load(intr, shader);
520 case nir_intrinsic_store_ssbo:
521 return emit_ssbo_store(intr, shader);
522 case nir_intrinsic_ssbo_atomic:
523 case nir_intrinsic_ssbo_atomic_swap:
524 return emit_ssbo_atomic_op(intr, shader);
525 case nir_intrinsic_store_global:
526 return emit_global_store(intr, shader);
527 case nir_intrinsic_image_store:
528 return emit_image_store(intr, shader);
529 case nir_intrinsic_image_load:
530 case nir_intrinsic_image_atomic:
531 case nir_intrinsic_image_atomic_swap:
532 return emit_image_load_or_atomic(intr, shader);
533 case nir_intrinsic_image_size:
534 return emit_image_size(intr, shader);
535 case nir_intrinsic_image_samples:
536 return emit_image_samples(intr, shader);
537 case nir_intrinsic_get_ssbo_size:
538 return emit_ssbo_size(intr, shader);
539 default:
540 return false;
541 }
542 }
543
544 bool
emit_ssbo_load(nir_intrinsic_instr * intr,Shader & shader)545 RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader)
546 {
547 auto& vf = shader.value_factory();
548 auto dest = vf.dest_vec4(intr->def, pin_group);
549
550 /** src0 not used, should be some offset */
551 auto addr = vf.src(intr->src[1], 0);
552 auto addr_temp = vf.temp_register();
553
554 /** Should be lowered in nir */
555 shader.emit_instruction(new AluInstr(
556 op2_lshr_int, addr_temp, addr, vf.literal(2), {alu_write, alu_last_instr}));
557
558 const EVTXDataFormat formats[4] = {fmt_32, fmt_32_32, fmt_32_32_32, fmt_32_32_32_32};
559
560 RegisterVec4::Swizzle dest_swz[4] = {
561 {0, 7, 7, 7},
562 {0, 1, 7, 7},
563 {0, 1, 2, 7},
564 {0, 1, 2, 3}
565 };
566
567 int comp_idx = intr->def.num_components - 1;
568
569 auto [offset, res_offset] = shader.evaluate_resource_offset(intr, 0);
570 {
571 }
572
573 auto res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + offset + shader.ssbo_image_offset();
574
575 auto ir = new LoadFromBuffer(
576 dest, dest_swz[comp_idx], addr_temp, 0, res_id, res_offset, formats[comp_idx]);
577 ir->set_fetch_flag(FetchInstr::use_tc);
578 ir->set_num_format(vtx_nf_int);
579
580 shader.emit_instruction(ir);
581 return true;
582 }
583
584 bool
emit_global_store(nir_intrinsic_instr * intr,Shader & shader)585 RatInstr::emit_global_store(nir_intrinsic_instr *intr, Shader& shader)
586 {
587 auto& vf = shader.value_factory();
588 auto addr_orig = vf.src(intr->src[1], 0);
589 auto addr_vec = vf.temp_vec4(pin_chan, {0, 7, 7, 7});
590
591 shader.emit_instruction(
592 new AluInstr(op2_lshr_int, addr_vec[0], addr_orig, vf.literal(2),
593 AluInstr::last_write));
594
595 RegisterVec4::Swizzle value_swz = {0,7,7,7};
596 auto mask = nir_intrinsic_write_mask(intr);
597 for (int i = 0; i < 4; ++i) {
598 if (mask & (1 << i))
599 value_swz[i] = i;
600 }
601
602 auto value_vec = vf.temp_vec4(pin_chgr, value_swz);
603
604 AluInstr *ir = nullptr;
605 for (int i = 0; i < 4; ++i) {
606 if (value_swz[i] < 4) {
607 ir = new AluInstr(op1_mov, value_vec[i],
608 vf.src(intr->src[0], i), AluInstr::write);
609 shader.emit_instruction(ir);
610 }
611 }
612 if (ir)
613 ir->set_alu_flag(alu_last_instr);
614
615 auto store = new RatInstr(cf_mem_rat_cacheless,
616 RatInstr::STORE_RAW,
617 value_vec,
618 addr_vec,
619 shader.ssbo_image_offset(),
620 nullptr,
621 1,
622 mask,
623 0);
624 shader.emit_instruction(store);
625 return true;
626 }
627
628 bool
emit_ssbo_store(nir_intrinsic_instr * instr,Shader & shader)629 RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader)
630 {
631 auto& vf = shader.value_factory();
632 auto orig_addr = vf.src(instr->src[2], 0);
633
634 auto addr_base = vf.temp_register();
635
636 auto [offset, rat_id] = shader.evaluate_resource_offset(instr, 1);
637
638 shader.emit_instruction(
639 new AluInstr(op2_lshr_int, addr_base, orig_addr, vf.literal(2), AluInstr::write));
640
641 for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) {
642 auto addr_vec = vf.temp_vec4(pin_group, {0, 1, 2, 7});
643 if (i == 0) {
644 shader.emit_instruction(
645 new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::last_write));
646 } else {
647 shader.emit_instruction(new AluInstr(
648 op2_add_int, addr_vec[0], addr_base, vf.literal(i), AluInstr::last_write));
649 }
650 auto value = vf.src(instr->src[0], i);
651 PRegister v = vf.temp_register(0);
652 shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::last_write));
653 auto value_vec = RegisterVec4(v, nullptr, nullptr, nullptr, pin_chan);
654 auto store = new RatInstr(cf_mem_rat,
655 RatInstr::STORE_TYPED,
656 value_vec,
657 addr_vec,
658 offset + shader.ssbo_image_offset(),
659 rat_id,
660 1,
661 1,
662 0);
663 shader.emit_instruction(store);
664 }
665
666 return true;
667 }
668
669 bool
emit_ssbo_atomic_op(nir_intrinsic_instr * intr,Shader & shader)670 RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader)
671 {
672 auto& vf = shader.value_factory();
673 auto [imageid, image_offset] = shader.evaluate_resource_offset(intr, 0);
674 {
675 }
676
677 bool read_result = !list_is_empty(&intr->def.uses);
678 auto opcode = read_result ? get_rat_opcode(nir_intrinsic_atomic_op(intr))
679 : get_rat_opcode_wo(nir_intrinsic_atomic_op(intr));
680
681 auto coord_orig = vf.src(intr->src[1], 0);
682 auto coord = vf.temp_register(0);
683
684 auto data_vec4 = vf.temp_vec4(pin_chgr, {0, 1, 2, 3});
685
686 shader.emit_instruction(
687 new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::last_write));
688
689 shader.emit_instruction(
690 new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
691
692 if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap) {
693 shader.emit_instruction(
694 new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[3], 0), AluInstr::write));
695 shader.emit_instruction(
696 new AluInstr(op1_mov,
697 data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
698 vf.src(intr->src[2], 0),
699 {alu_last_instr, alu_write}));
700 } else {
701 shader.emit_instruction(new AluInstr(
702 op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::last_write));
703 }
704
705 RegisterVec4 out_vec(coord, coord, coord, coord, pin_chgr);
706
707 auto atomic = new RatInstr(cf_mem_rat,
708 opcode,
709 data_vec4,
710 out_vec,
711 imageid + shader.ssbo_image_offset(),
712 image_offset,
713 1,
714 0xf,
715 0);
716 shader.emit_instruction(atomic);
717
718 atomic->set_ack();
719 if (read_result) {
720 atomic->set_instr_flag(ack_rat_return_write);
721 auto dest = vf.dest_vec4(intr->def, pin_group);
722
723 auto fetch = new FetchInstr(vc_fetch,
724 dest,
725 {0, 1, 2, 3},
726 shader.rat_return_address(),
727 0,
728 no_index_offset,
729 fmt_32,
730 vtx_nf_int,
731 vtx_es_none,
732 R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
733 image_offset);
734 fetch->set_mfc(15);
735 fetch->set_fetch_flag(FetchInstr::srf_mode);
736 fetch->set_fetch_flag(FetchInstr::use_tc);
737 fetch->set_fetch_flag(FetchInstr::vpm);
738 fetch->set_fetch_flag(FetchInstr::wait_ack);
739 fetch->add_required_instr(atomic);
740 shader.chain_ssbo_read(fetch);
741 shader.emit_instruction(fetch);
742 }
743
744 return true;
745 }
746
747 bool
emit_ssbo_size(nir_intrinsic_instr * intr,Shader & shader)748 RatInstr::emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader)
749 {
750 auto& vf = shader.value_factory();
751 auto dest = vf.dest_vec4(intr->def, pin_group);
752
753 auto const_offset = nir_src_as_const_value(intr->src[0]);
754 int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
755 if (const_offset)
756 res_id += const_offset[0].u32;
757 else
758 assert(0 && "dynamic buffer offset not supported in buffer_size");
759
760 shader.emit_instruction(new QueryBufferSizeInstr(dest, {0, 1, 2, 3}, res_id));
761 return true;
762 }
763
764 bool
emit_image_store(nir_intrinsic_instr * intrin,Shader & shader)765 RatInstr::emit_image_store(nir_intrinsic_instr *intrin, Shader& shader)
766 {
767 auto& vf = shader.value_factory();
768 auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0);
769 {
770 }
771
772 auto coord_load = vf.src_vec4(intrin->src[1], pin_chan);
773 auto coord = vf.temp_vec4(pin_chgr);
774
775 auto value_load = vf.src_vec4(intrin->src[3], pin_chan);
776 auto value = vf.temp_vec4(pin_chgr);
777
778 RegisterVec4::Swizzle swizzle = {0, 1, 2, 3};
779 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
780 nir_intrinsic_image_array(intrin))
781 swizzle = {0, 2, 1, 3};
782
783 for (int i = 0; i < 4; ++i) {
784 auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
785 shader.emit_instruction(
786 new AluInstr(op1_mov, coord[swizzle[i]], coord_load[i], flags));
787 }
788 for (int i = 0; i < 4; ++i) {
789 auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
790 shader.emit_instruction(new AluInstr(op1_mov, value[i], value_load[i], flags));
791 }
792
793 auto op = cf_mem_rat; // nir_intrinsic_access(intrin) & ACCESS_COHERENT ?
794 // cf_mem_rat_cacheless : cf_mem_rat;
795 auto store = new RatInstr(
796 op, RatInstr::STORE_TYPED, value, coord, imageid, image_offset, 1, 0xf, 0);
797
798 store->set_ack();
799 if (nir_intrinsic_access(intrin) & ACCESS_INCLUDE_HELPERS)
800 store->set_instr_flag(Instr::helper);
801
802 shader.emit_instruction(store);
803 return true;
804 }
805
806 bool
emit_image_load_or_atomic(nir_intrinsic_instr * intrin,Shader & shader)807 RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader)
808 {
809 auto& vf = shader.value_factory();
810 auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0);
811 {
812 }
813
814 bool read_result = !list_is_empty(&intrin->def.uses);
815 bool image_load = (intrin->intrinsic == nir_intrinsic_image_load);
816 auto opcode = image_load ? RatInstr::NOP_RTN :
817 read_result ? get_rat_opcode(nir_intrinsic_atomic_op(intrin))
818 : get_rat_opcode_wo(nir_intrinsic_atomic_op(intrin));
819
820 auto coord_orig = vf.src_vec4(intrin->src[1], pin_chan);
821 auto coord = vf.temp_vec4(pin_chgr);
822
823 auto data_vec4 = vf.temp_vec4(pin_chgr, {0, 1, 2, 3});
824
825 RegisterVec4::Swizzle swizzle = {0, 1, 2, 3};
826 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
827 nir_intrinsic_image_array(intrin))
828 swizzle = {0, 2, 1, 3};
829
830 for (int i = 0; i < 4; ++i) {
831 auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
832 shader.emit_instruction(
833 new AluInstr(op1_mov, coord[swizzle[i]], coord_orig[i], flags));
834 }
835
836 shader.emit_instruction(
837 new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
838
839 if (intrin->intrinsic == nir_intrinsic_image_atomic_swap) {
840 shader.emit_instruction(
841 new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[4], 0), AluInstr::write));
842 shader.emit_instruction(
843 new AluInstr(op1_mov,
844 data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
845 vf.src(intrin->src[3], 0),
846 AluInstr::last_write));
847 } else {
848 shader.emit_instruction(
849 new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[3], 0), AluInstr::write));
850 shader.emit_instruction(
851 new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::last_write));
852 }
853
854 auto atomic =
855 new RatInstr(cf_mem_rat, opcode, data_vec4, coord, imageid, image_offset, 1, 0xf, 0);
856 shader.emit_instruction(atomic);
857
858 atomic->set_ack();
859 if (read_result) {
860 atomic->set_instr_flag(ack_rat_return_write);
861 auto dest = vf.dest_vec4(intrin->def, pin_group);
862
863 pipe_format format = nir_intrinsic_format(intrin);
864 unsigned fmt = fmt_32;
865 unsigned num_format = 0;
866 unsigned format_comp = 0;
867 unsigned endian = 0;
868 r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
869
870 auto fetch = new FetchInstr(vc_fetch,
871 dest,
872 {0, 1, 2, 3},
873 shader.rat_return_address(),
874 0,
875 no_index_offset,
876 (EVTXDataFormat)fmt,
877 (EVFetchNumFormat)num_format,
878 (EVFetchEndianSwap)endian,
879 R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
880 image_offset);
881 fetch->set_mfc(3);
882 fetch->set_fetch_flag(FetchInstr::srf_mode);
883 fetch->set_fetch_flag(FetchInstr::use_tc);
884 fetch->set_fetch_flag(FetchInstr::vpm);
885 fetch->set_fetch_flag(FetchInstr::wait_ack);
886 if (format_comp)
887 fetch->set_fetch_flag(FetchInstr::format_comp_signed);
888
889 shader.chain_ssbo_read(fetch);
890 shader.emit_instruction(fetch);
891 }
892
893 return true;
894 }
895
896 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
897
898 bool
emit_image_size(nir_intrinsic_instr * intrin,Shader & shader)899 RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader)
900 {
901 auto& vf = shader.value_factory();
902
903 auto src = RegisterVec4(0, true, {4, 4, 4, 4});
904
905 assert(nir_src_as_uint(intrin->src[1]) == 0);
906
907 auto const_offset = nir_src_as_const_value(intrin->src[0]);
908 PRegister dyn_offset = nullptr;
909
910 int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + nir_intrinsic_range_base(intrin);
911 if (const_offset)
912 res_id += const_offset[0].u32;
913 else
914 dyn_offset = shader.emit_load_to_register(vf.src(intrin->src[0], 0));
915
916 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
917 auto dest = vf.dest_vec4(intrin->def, pin_group);
918 shader.emit_instruction(new QueryBufferSizeInstr(dest, {0, 1, 2, 3}, res_id));
919 return true;
920 } else {
921
922 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
923 nir_intrinsic_image_array(intrin) &&
924 intrin->def.num_components > 2) {
925 /* Need to load the layers from a const buffer */
926
927 auto dest = vf.dest_vec4(intrin->def, pin_group);
928 shader.emit_instruction(new TexInstr(TexInstr::get_resinfo,
929 dest,
930 {0, 1, 7, 3},
931 src,
932 res_id,
933 dyn_offset));
934
935 shader.set_flag(Shader::sh_txs_cube_array_comp);
936
937 if (const_offset) {
938 unsigned lookup_resid = const_offset[0].u32 + shader.image_size_const_offset();
939 shader.emit_instruction(
940 new AluInstr(op1_mov,
941 dest[2],
942 vf.uniform(lookup_resid / 4 + R600_SHADER_BUFFER_INFO_SEL,
943 lookup_resid % 4,
944 R600_BUFFER_INFO_CONST_BUFFER),
945 AluInstr::last_write));
946 } else {
947 /* If the addressing is indirect we have to get the z-value by
948 * using a binary search */
949 auto addr = vf.temp_register();
950 auto comp1 = vf.temp_register();
951 auto comp2 = vf.temp_register();
952 auto low_bit = vf.temp_register();
953 auto high_bit = vf.temp_register();
954
955 auto trgt = vf.temp_vec4(pin_group);
956
957 shader.emit_instruction(new AluInstr(op2_lshr_int,
958 addr,
959 vf.src(intrin->src[0], 0),
960 vf.literal(2),
961 AluInstr::write));
962 shader.emit_instruction(new AluInstr(op2_and_int,
963 low_bit,
964 vf.src(intrin->src[0], 0),
965 vf.one_i(),
966 AluInstr::write));
967 shader.emit_instruction(new AluInstr(op2_and_int,
968 high_bit,
969 vf.src(intrin->src[0], 0),
970 vf.literal(2),
971 AluInstr::last_write));
972
973 shader.emit_instruction(new LoadFromBuffer(trgt,
974 {0, 1, 2, 3},
975 addr,
976 R600_SHADER_BUFFER_INFO_SEL,
977 R600_BUFFER_INFO_CONST_BUFFER,
978 nullptr,
979 fmt_32_32_32_32_float));
980
981 // this may be wrong
982 shader.emit_instruction(new AluInstr(
983 op3_cnde_int, comp1, high_bit, trgt[0], trgt[2], AluInstr::write));
984 shader.emit_instruction(new AluInstr(
985 op3_cnde_int, comp2, high_bit, trgt[1], trgt[3], AluInstr::last_write));
986 shader.emit_instruction(new AluInstr(
987 op3_cnde_int, dest[2], low_bit, comp1, comp2, AluInstr::last_write));
988 }
989 } else {
990 auto dest = vf.dest_vec4(intrin->def, pin_group);
991 shader.emit_instruction(new TexInstr(TexInstr::get_resinfo,
992 dest,
993 {0, 1, 2, 3},
994 src,
995 res_id,
996 dyn_offset));
997 }
998 }
999 return true;
1000 }
1001
1002 bool
emit_image_samples(nir_intrinsic_instr * intrin,Shader & shader)1003 RatInstr::emit_image_samples(nir_intrinsic_instr *intrin, Shader& shader)
1004 {
1005 auto& vf = shader.value_factory();
1006
1007 auto src = RegisterVec4(0, true, {4, 4, 4, 4});
1008
1009 auto tmp = shader.value_factory().temp_vec4(pin_group);
1010 auto dest = shader.value_factory().dest(intrin->def, 0, pin_free);
1011
1012 auto const_offset = nir_src_as_const_value(intrin->src[0]);
1013 PRegister dyn_offset = nullptr;
1014
1015 int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + nir_intrinsic_range_base(intrin);
1016 if (const_offset)
1017 res_id += const_offset[0].u32;
1018 else
1019 dyn_offset = shader.emit_load_to_register(vf.src(intrin->src[0], 0));
1020
1021 shader.emit_instruction(new TexInstr(TexInstr::get_resinfo,
1022 tmp,
1023 {3, 7, 7, 7},
1024 src,
1025 res_id,
1026 dyn_offset));
1027
1028 shader.emit_instruction(new AluInstr(op1_mov, dest, tmp[0], AluInstr::last_write));
1029 return true;
1030 }
1031
1032 } // namespace r600
1033