1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_instr_mem.h"
28 #include "sfn_instr_fetch.h"
29 #include "sfn_instr_tex.h"
30 #include "sfn_shader.h"
31
32 namespace r600 {
33
34
GDSInstr(ESDOp op,Register * dest,const RegisterVec4 & src,int uav_base,PRegister uav_id)35 GDSInstr::GDSInstr(ESDOp op, Register *dest,
36 const RegisterVec4& src, int uav_base,
37 PRegister uav_id):
38 m_op(op),
39 m_dest(dest),
40 m_src(src),
41 m_uav_base(uav_base),
42 m_uav_id(uav_id)
43 {
44 set_always_keep();
45
46 m_src.add_use(this);
47 m_dest->add_parent(this);
48
49 if (m_uav_id)
50 m_uav_id->add_use(this);
51 }
52
is_equal_to(const GDSInstr & rhs) const53 bool GDSInstr::is_equal_to(const GDSInstr& rhs) const
54 {
55 #define NE(X) (X != rhs. X)
56
57 if (NE(m_op) ||
58 NE(m_src) ||
59 NE(m_uav_base))
60 return false;
61
62 sfn_value_equal(m_dest, rhs.m_dest);
63
64 return sfn_value_equal(m_uav_id, rhs.m_uav_id);
65 }
66
accept(ConstInstrVisitor & visitor) const67 void GDSInstr::accept(ConstInstrVisitor& visitor) const
68 {
69 visitor.visit(*this);
70 }
71
accept(InstrVisitor & visitor)72 void GDSInstr::accept(InstrVisitor& visitor)
73 {
74 visitor.visit(this);
75 }
76
do_ready() const77 bool GDSInstr::do_ready() const
78 {
79 return m_src.ready(block_id(), index()) &&
80 (!m_uav_id || m_uav_id->ready(block_id(), index()));
81 }
82
do_print(std::ostream & os) const83 void GDSInstr::do_print(std::ostream& os) const
84 {
85 os << "GDS " << lds_ops.at(m_op).name
86 << *m_dest;
87 os << " " << m_src;
88 os << " BASE:" << m_uav_base;
89
90 if (m_uav_id)
91 os << " UAV:" << *m_uav_id;
92 }
93
emit_atomic_counter(nir_intrinsic_instr * intr,Shader & shader)94 bool GDSInstr::emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader)
95 {
96 switch (intr->intrinsic) {
97 case nir_intrinsic_atomic_counter_add:
98 case nir_intrinsic_atomic_counter_and:
99 case nir_intrinsic_atomic_counter_exchange:
100 case nir_intrinsic_atomic_counter_max:
101 case nir_intrinsic_atomic_counter_min:
102 case nir_intrinsic_atomic_counter_or:
103 case nir_intrinsic_atomic_counter_xor:
104 case nir_intrinsic_atomic_counter_comp_swap:
105 return emit_atomic_op2(intr, shader);
106 case nir_intrinsic_atomic_counter_read:
107 case nir_intrinsic_atomic_counter_post_dec:
108 return emit_atomic_read(intr, shader);
109 case nir_intrinsic_atomic_counter_inc:
110 return emit_atomic_inc(intr, shader);
111 case nir_intrinsic_atomic_counter_pre_dec:
112 return emit_atomic_pre_dec(intr, shader);
113 default:
114 return false;
115 }
116 }
117
get_opcode(const nir_intrinsic_op opcode)118 static ESDOp get_opcode(const nir_intrinsic_op opcode)
119 {
120 switch (opcode) {
121 case nir_intrinsic_atomic_counter_add:
122 return DS_OP_ADD_RET;
123 case nir_intrinsic_atomic_counter_and:
124 return DS_OP_AND_RET;
125 case nir_intrinsic_atomic_counter_exchange:
126 return DS_OP_XCHG_RET;
127 case nir_intrinsic_atomic_counter_inc:
128 return DS_OP_INC_RET;
129 case nir_intrinsic_atomic_counter_max:
130 return DS_OP_MAX_UINT_RET;
131 case nir_intrinsic_atomic_counter_min:
132 return DS_OP_MIN_UINT_RET;
133 case nir_intrinsic_atomic_counter_or:
134 return DS_OP_OR_RET;
135 case nir_intrinsic_atomic_counter_read:
136 return DS_OP_READ_RET;
137 case nir_intrinsic_atomic_counter_xor:
138 return DS_OP_XOR_RET;
139 case nir_intrinsic_atomic_counter_post_dec:
140 return DS_OP_DEC_RET;
141 case nir_intrinsic_atomic_counter_comp_swap:
142 return DS_OP_CMP_XCHG_RET;
143 case nir_intrinsic_atomic_counter_pre_dec:
144 default:
145 return DS_OP_INVALID;
146 }
147 }
148
get_opcode_wo(const nir_intrinsic_op opcode)149 static ESDOp get_opcode_wo(const nir_intrinsic_op opcode)
150 {
151 switch (opcode) {
152 case nir_intrinsic_atomic_counter_add:
153 return DS_OP_ADD;
154 case nir_intrinsic_atomic_counter_and:
155 return DS_OP_AND;
156 case nir_intrinsic_atomic_counter_inc:
157 return DS_OP_INC;
158 case nir_intrinsic_atomic_counter_max:
159 return DS_OP_MAX_UINT;
160 case nir_intrinsic_atomic_counter_min:
161 return DS_OP_MIN_UINT;
162 case nir_intrinsic_atomic_counter_or:
163 return DS_OP_OR;
164 case nir_intrinsic_atomic_counter_xor:
165 return DS_OP_XOR;
166 case nir_intrinsic_atomic_counter_post_dec:
167 return DS_OP_DEC;
168 case nir_intrinsic_atomic_counter_comp_swap:
169 return DS_OP_CMP_XCHG_RET;
170 case nir_intrinsic_atomic_counter_exchange:
171 return DS_OP_XCHG_RET;
172 case nir_intrinsic_atomic_counter_pre_dec:
173 default:
174 return DS_OP_INVALID;
175 }
176 }
177
178
emit_atomic_op2(nir_intrinsic_instr * instr,Shader & shader)179 bool GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader)
180 {
181 auto& vf = shader.value_factory();
182 bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
183
184 ESDOp op = read_result ? get_opcode(instr->intrinsic) :
185 get_opcode_wo(instr->intrinsic);
186
187 if (DS_OP_INVALID == op)
188 return false;
189
190 auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
191 offset += nir_intrinsic_base(instr);
192
193 auto dest = vf.dest(instr->dest, 0, pin_free);
194
195 PRegister src_as_register = nullptr;
196 auto src_val = vf.src(instr->src[1], 0);
197 if (!src_val->as_register()) {
198 auto temp_src_val = vf.temp_register();
199 shader.emit_instruction(new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::last_write));
200 src_as_register = temp_src_val;
201 } else
202 src_as_register = src_val->as_register();
203
204 if (uav_id != nullptr)
205 shader.set_flag(Shader::sh_indirect_atomic);
206
207 GDSInstr *ir = nullptr;
208 if (shader.chip_class() < ISA_CC_CAYMAN) {
209 RegisterVec4 src(nullptr, src_as_register, nullptr, nullptr, pin_free);
210 ir = new GDSInstr(op, dest, src, offset, uav_id);
211
212 } else {
213 auto dest = vf.dest(instr->dest, 0, pin_free);
214 auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
215 if (uav_id)
216 shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
217 AluInstr::write));
218 else
219 shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
220 shader.emit_instruction(new AluInstr(op1_mov, tmp[1], src_val, AluInstr::last_write));
221 ir = new GDSInstr(op, dest, tmp, 0, nullptr);
222 }
223 shader.emit_instruction(ir);
224 return true;
225 }
226
emit_atomic_read(nir_intrinsic_instr * instr,Shader & shader)227 bool GDSInstr::emit_atomic_read(nir_intrinsic_instr *instr, Shader& shader)
228 {
229 auto& vf = shader.value_factory();
230
231 auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
232 offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
233
234 auto dest = vf.dest(instr->dest, 0, pin_free);
235
236 GDSInstr *ir = nullptr;
237
238 if (shader.chip_class() < ISA_CC_CAYMAN) {
239 RegisterVec4 src = RegisterVec4(0, true, {7,7,7,7});
240 ir = new GDSInstr(DS_OP_READ_RET, dest, src, offset, uav_id);
241 } else {
242 auto tmp = vf.temp_vec4(pin_group, {0, 7, 7, 7});
243 if (uav_id)
244 shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
245 AluInstr::write));
246 else
247 shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
248
249 ir = new GDSInstr(DS_OP_READ_RET, dest, tmp, 0, nullptr);
250 }
251
252 shader.emit_instruction(ir);
253 return true;
254 }
255
emit_atomic_inc(nir_intrinsic_instr * instr,Shader & shader)256 bool GDSInstr::emit_atomic_inc(nir_intrinsic_instr *instr, Shader& shader)
257 {
258 auto& vf = shader.value_factory();
259 bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
260
261 auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
262 offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
263
264 GDSInstr *ir = nullptr;
265
266 if (shader.chip_class() < ISA_CC_CAYMAN) {
267 auto dest = vf.dest(instr->dest, 0, pin_free);
268 RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
269 ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD,
270 dest, src, offset, uav_id);
271 } else {
272 auto dest = vf.dest(instr->dest, 0, pin_free);
273 auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
274
275 if (uav_id)
276 shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
277 AluInstr::write));
278 else
279 shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
280
281 shader.emit_instruction(new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
282 ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD,
283 dest, tmp, 0, nullptr);
284 }
285 shader.emit_instruction(ir);
286 return true;
287 }
288
emit_atomic_pre_dec(nir_intrinsic_instr * instr,Shader & shader)289 bool GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader)
290 {
291 auto& vf = shader.value_factory();
292
293 auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
294 offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
295
296 auto *tmp_dest = vf.temp_register();
297
298 GDSInstr *ir = nullptr;
299
300 if (shader.chip_class() < ISA_CC_CAYMAN) {
301 RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
302 ir = new GDSInstr(DS_OP_SUB_RET, tmp_dest, src, offset, uav_id);
303 } else {
304 auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
305 if (uav_id)
306 shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
307 AluInstr::write));
308 else
309 shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
310
311 shader.emit_instruction(new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
312 ir = new GDSInstr(DS_OP_SUB_RET, tmp_dest, tmp, 0, nullptr);
313 }
314
315 shader.emit_instruction(ir);
316 shader.emit_instruction(new AluInstr(op2_sub_int, vf.dest(instr->dest, 0, pin_free),
317 tmp_dest, vf.one_i(), AluInstr::last_write));
318 return true;
319 }
320
321
RatInstr(ECFOpCode cf_opcode,ERatOp rat_op,const RegisterVec4 & data,const RegisterVec4 & index,int rat_id,PRegister rat_id_offset,int burst_count,int comp_mask,int element_size)322 RatInstr::RatInstr(ECFOpCode cf_opcode, ERatOp rat_op,
323 const RegisterVec4& data, const RegisterVec4& index,
324 int rat_id, PRegister rat_id_offset,
325 int burst_count, int comp_mask, int element_size):
326 m_cf_opcode(cf_opcode),
327 m_rat_op(rat_op),
328 m_data(data),
329 m_index(index),
330 m_rat_id_offset(rat_id_offset),
331 m_rat_id(rat_id),
332 m_burst_count(burst_count),
333 m_comp_mask(comp_mask),
334 m_element_size(element_size)
335 {
336 set_always_keep();
337
338 m_data.add_use(this);
339 m_index.add_use(this);
340 if (m_rat_id_offset)
341 m_rat_id_offset->add_use(this);
342 }
343
344
accept(ConstInstrVisitor & visitor) const345 void RatInstr::accept(ConstInstrVisitor& visitor) const
346 {
347 visitor.visit(*this);
348 }
349
accept(InstrVisitor & visitor)350 void RatInstr::accept(InstrVisitor& visitor)
351 {
352 visitor.visit(this);
353 }
354
is_equal_to(const RatInstr & lhs) const355 bool RatInstr::is_equal_to(const RatInstr& lhs) const
356 {
357 (void)lhs;
358 assert(0);
359 return false;
360 }
361
do_ready() const362 bool RatInstr::do_ready() const
363 {
364 if (m_rat_op != STORE_TYPED) {
365 for (auto i: required_instr()) {
366 if (!i->is_scheduled()) {
367 return false;
368 }
369 }
370 }
371
372 return m_data.ready(block_id(), index()) &&
373 m_index.ready(block_id(), index());
374 }
375
do_print(std::ostream & os) const376 void RatInstr::do_print(std::ostream& os) const
377 {
378 os << "MEM_RAT RAT " << m_rat_id;
379 if (m_rat_id_offset)
380 os << "+" << *m_rat_id_offset;
381 os << " @" << m_index;
382 os << " OP:" << m_rat_op << " " << m_data;
383 os << " BC:" << m_burst_count
384 << " MASK:" << m_comp_mask
385 << " ES:" << m_element_size;
386 if (m_need_ack)
387 os << " ACK";
388 }
389
390 static RatInstr::ERatOp
get_rat_opcode(const nir_intrinsic_op opcode,pipe_format format)391 get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format)
392 {
393 switch (opcode) {
394 case nir_intrinsic_image_load:
395 return RatInstr::NOP_RTN;
396 case nir_intrinsic_ssbo_atomic_add:
397 case nir_intrinsic_image_atomic_add:
398 return RatInstr::ADD_RTN;
399 case nir_intrinsic_ssbo_atomic_and:
400 case nir_intrinsic_image_atomic_and:
401 return RatInstr::AND_RTN;
402 case nir_intrinsic_ssbo_atomic_or:
403 case nir_intrinsic_image_atomic_or:
404 return RatInstr::OR_RTN;
405 case nir_intrinsic_ssbo_atomic_imin:
406 case nir_intrinsic_image_atomic_imin:
407 return RatInstr::MIN_INT_RTN;
408 case nir_intrinsic_ssbo_atomic_imax:
409 case nir_intrinsic_image_atomic_imax:
410 return RatInstr::MAX_INT_RTN;
411 case nir_intrinsic_ssbo_atomic_umin:
412 case nir_intrinsic_image_atomic_umin:
413 return RatInstr::MIN_UINT_RTN;
414 case nir_intrinsic_ssbo_atomic_umax:
415 case nir_intrinsic_image_atomic_umax:
416 return RatInstr::MAX_UINT_RTN;
417 case nir_intrinsic_ssbo_atomic_xor:
418 case nir_intrinsic_image_atomic_xor:
419 return RatInstr::XOR_RTN;
420 case nir_intrinsic_ssbo_atomic_comp_swap:
421 case nir_intrinsic_image_atomic_comp_swap:
422 if (util_format_is_float(format))
423 return RatInstr::CMPXCHG_FLT_RTN;
424 else
425 return RatInstr::CMPXCHG_INT_RTN;
426 case nir_intrinsic_ssbo_atomic_exchange:
427 case nir_intrinsic_image_atomic_exchange:
428 return RatInstr::XCHG_RTN;
429 default:
430 unreachable("Unsupported WO RAT instruction");
431 }
432 }
433
434 static RatInstr::ERatOp
get_rat_opcode_wo(const nir_intrinsic_op opcode,pipe_format format)435 get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format)
436 {
437 switch (opcode) {
438 case nir_intrinsic_ssbo_atomic_add:
439 case nir_intrinsic_image_atomic_add:
440 return RatInstr::ADD;
441 case nir_intrinsic_ssbo_atomic_and:
442 case nir_intrinsic_image_atomic_and:
443 return RatInstr::AND;
444 case nir_intrinsic_ssbo_atomic_or:
445 case nir_intrinsic_image_atomic_or:
446 return RatInstr::OR;
447 case nir_intrinsic_ssbo_atomic_imin:
448 case nir_intrinsic_image_atomic_imin:
449 return RatInstr::MIN_INT;
450 case nir_intrinsic_ssbo_atomic_imax:
451 case nir_intrinsic_image_atomic_imax:
452 return RatInstr::MAX_INT;
453 case nir_intrinsic_ssbo_atomic_umin:
454 case nir_intrinsic_image_atomic_umin:
455 return RatInstr::MIN_UINT;
456 case nir_intrinsic_ssbo_atomic_umax:
457 case nir_intrinsic_image_atomic_umax:
458 return RatInstr::MAX_UINT;
459 case nir_intrinsic_ssbo_atomic_xor:
460 case nir_intrinsic_image_atomic_xor:
461 return RatInstr::XOR;
462 case nir_intrinsic_ssbo_atomic_comp_swap:
463 case nir_intrinsic_image_atomic_comp_swap:
464 if (util_format_is_float(format))
465 return RatInstr::CMPXCHG_FLT;
466 else
467 return RatInstr::CMPXCHG_INT;
468 case nir_intrinsic_ssbo_atomic_exchange:
469 case nir_intrinsic_image_atomic_exchange:
470 return RatInstr::XCHG_RTN;
471 default:
472 unreachable("Unsupported WO RAT instruction");
473 }
474 }
475
emit(nir_intrinsic_instr * intr,Shader & shader)476 bool RatInstr::emit(nir_intrinsic_instr *intr, Shader& shader)
477 {
478 switch (intr->intrinsic) {
479 case nir_intrinsic_load_ssbo:
480 return emit_ssbo_load(intr, shader);
481 case nir_intrinsic_store_ssbo:
482 return emit_ssbo_store(intr, shader);
483 case nir_intrinsic_ssbo_atomic_add:
484 case nir_intrinsic_ssbo_atomic_comp_swap:
485 case nir_intrinsic_ssbo_atomic_or:
486 case nir_intrinsic_ssbo_atomic_xor:
487 case nir_intrinsic_ssbo_atomic_imax:
488 case nir_intrinsic_ssbo_atomic_imin:
489 case nir_intrinsic_ssbo_atomic_umax:
490 case nir_intrinsic_ssbo_atomic_umin:
491 case nir_intrinsic_ssbo_atomic_and:
492 case nir_intrinsic_ssbo_atomic_exchange:
493 return emit_ssbo_atomic_op(intr, shader);
494 case nir_intrinsic_image_store:
495 return emit_image_store(intr, shader);
496 case nir_intrinsic_image_load:
497 case nir_intrinsic_image_atomic_add:
498 case nir_intrinsic_image_atomic_and:
499 case nir_intrinsic_image_atomic_or:
500 case nir_intrinsic_image_atomic_xor:
501 case nir_intrinsic_image_atomic_exchange:
502 case nir_intrinsic_image_atomic_comp_swap:
503 case nir_intrinsic_image_atomic_umin:
504 case nir_intrinsic_image_atomic_umax:
505 case nir_intrinsic_image_atomic_imin:
506 case nir_intrinsic_image_atomic_imax:
507 return emit_image_load_or_atomic(intr, shader);
508 case nir_intrinsic_image_size:
509 return emit_image_size(intr, shader);
510 case nir_intrinsic_get_ssbo_size:
511 return emit_ssbo_size(intr, shader);
512 default:
513 return false;
514 }
515 }
516
emit_ssbo_load(nir_intrinsic_instr * intr,Shader & shader)517 bool RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader)
518 {
519 auto &vf = shader.value_factory();
520 auto dest = vf.dest_vec4(intr->dest, pin_group);
521
522 /** src0 not used, should be some offset */
523 auto addr = vf.src(intr->src[1], 0);
524 auto addr_temp = vf.temp_register();
525
526 /** Should be lowered in nir */
527 shader.emit_instruction(new AluInstr(op2_lshr_int, addr_temp, addr, vf.literal(2),
528 {alu_write, alu_last_instr}));
529
530 const EVTXDataFormat formats[4] = {
531 fmt_32,
532 fmt_32_32,
533 fmt_32_32_32,
534 fmt_32_32_32_32
535 };
536
537 RegisterVec4::Swizzle dest_swz[4] = {
538 {0,7,7,7},
539 {0,1,7,7},
540 {0,1,2,7},
541 {0,1,2,3}
542 };
543
544 int comp_idx = nir_dest_num_components(intr->dest) - 1;
545
546 auto [offset, res_offset] = shader.evaluate_resource_offset(intr, 0); {}
547
548 auto res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + offset +
549 shader.ssbo_image_offset();
550
551 auto ir = new LoadFromBuffer(dest, dest_swz[comp_idx], addr_temp, 0,
552 res_id, res_offset, formats[comp_idx]);
553 ir->set_fetch_flag(FetchInstr::use_tc);
554 ir->set_num_format(vtx_nf_int);
555
556 shader.emit_instruction(ir);
557 return true;
558 }
559
emit_ssbo_store(nir_intrinsic_instr * instr,Shader & shader)560 bool RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader)
561 {
562 auto &vf = shader.value_factory();
563 auto orig_addr = vf.src(instr->src[2], 0);
564
565 auto addr_base = vf.temp_register();
566
567 auto [offset, rat_id] = shader.evaluate_resource_offset(instr, 1);
568
569 shader.emit_instruction(new AluInstr(op2_lshr_int, addr_base, orig_addr,
570 vf.literal(2), AluInstr::write));
571
572 for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) {
573 auto addr_vec = vf.temp_vec4(pin_group, {0,1,2,7});
574 if (i == 0) {
575 shader.emit_instruction(new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::last_write));
576 } else {
577 shader.emit_instruction(new AluInstr(op2_add_int, addr_vec[0], addr_base,
578 vf.literal(i),
579 AluInstr::last_write));
580 }
581 auto value = vf.src(instr->src[0], i);
582 PRegister v = vf.temp_register(0);
583 shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::last_write));
584 auto value_vec = RegisterVec4(v, nullptr, nullptr, nullptr, pin_chan);
585 auto store = new RatInstr(cf_mem_rat, RatInstr::STORE_TYPED,
586 value_vec, addr_vec, offset + shader.ssbo_image_offset(),
587 rat_id, 1, 1, 0);
588 shader.emit_instruction(store);
589 }
590
591 return true;
592 }
593
emit_ssbo_atomic_op(nir_intrinsic_instr * intr,Shader & shader)594 bool RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader)
595 {
596 auto& vf = shader.value_factory();
597 auto [imageid, image_offset] = shader.evaluate_resource_offset(intr, 0); {}
598
599 bool read_result = !intr->dest.is_ssa || !list_is_empty(&intr->dest.ssa.uses);
600 auto opcode = read_result ? get_rat_opcode(intr->intrinsic, PIPE_FORMAT_R32_UINT) :
601 get_rat_opcode_wo(intr->intrinsic, PIPE_FORMAT_R32_UINT);
602
603 auto coord_orig = vf.src(intr->src[1], 0);
604 auto coord = vf.temp_register(0);
605
606 auto data_vec4 = vf.temp_vec4(pin_group, {0,1,2,3});
607
608 shader.emit_instruction(new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::last_write));
609
610 shader.emit_instruction(new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
611
612
613 if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
614 shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[3], 0), AluInstr::write));
615 shader.emit_instruction(new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
616 vf.src(intr->src[2], 0), {alu_last_instr, alu_write}));
617 } else {
618 shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::last_write));
619 }
620
621
622 RegisterVec4 out_vec(coord, coord, coord, coord, pin_group);
623
624 auto atomic = new RatInstr(cf_mem_rat, opcode, data_vec4, out_vec, imageid + shader.ssbo_image_offset(),
625 image_offset, 1, 0xf, 0);
626 shader.emit_instruction(atomic);
627
628 atomic->set_ack();
629 if (read_result) {
630 atomic->set_instr_flag(ack_rat_return_write);
631 auto dest = vf.dest_vec4(intr->dest, pin_group);
632
633 auto fetch = new FetchInstr(vc_fetch,
634 dest, {0, 1, 2, 3},
635 shader.rat_return_address(),
636 0,
637 no_index_offset,
638 fmt_32,
639 vtx_nf_int,
640 vtx_es_none,
641 R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
642 image_offset);
643 fetch->set_mfc(15);
644 fetch->set_fetch_flag(FetchInstr::srf_mode);
645 fetch->set_fetch_flag(FetchInstr::use_tc);
646 fetch->set_fetch_flag(FetchInstr::vpm);
647 fetch->set_fetch_flag(FetchInstr::wait_ack);
648 fetch->add_required_instr(atomic);
649 shader.chain_ssbo_read(fetch);
650 shader.emit_instruction(fetch);
651 }
652
653 return true;
654
655 }
656
emit_ssbo_size(nir_intrinsic_instr * intr,Shader & shader)657 bool RatInstr::emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader)
658 {
659 auto& vf = shader.value_factory();
660 auto dest = vf.dest_vec4(intr->dest, pin_group);
661
662 auto const_offset = nir_src_as_const_value(intr->src[0]);
663 int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
664 if (const_offset)
665 res_id += const_offset[0].u32;
666 else
667 assert(0 && "dynamic buffer offset not supported in buffer_size");
668
669 shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,1,2,3},res_id));
670 return true;
671 }
672
emit_image_store(nir_intrinsic_instr * intrin,Shader & shader)673 bool RatInstr::emit_image_store(nir_intrinsic_instr *intrin, Shader& shader)
674 {
675 auto& vf = shader.value_factory();
676 auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0); {}
677
678
679 auto coord_load = vf.src_vec4(intrin->src[1], pin_chan);
680 auto coord = vf.temp_vec4(pin_group);
681
682 auto value_load = vf.src_vec4(intrin->src[3], pin_chan);
683 auto value = vf.temp_vec4(pin_group);
684
685 RegisterVec4::Swizzle swizzle = {0,1,2,3};
686 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
687 nir_intrinsic_image_array(intrin))
688 swizzle = {0,2,1,3};
689
690 for (int i = 0; i < 4; ++i) {
691 auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
692 shader.emit_instruction(new AluInstr(op1_mov, coord[swizzle[i]], coord_load[i], flags));
693 }
694 for (int i = 0; i < 4; ++i) {
695 auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
696 shader.emit_instruction(new AluInstr(op1_mov, value[i], value_load[i], flags));
697 }
698
699 auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
700 auto store = new RatInstr(op, RatInstr::STORE_TYPED, value, coord, imageid,
701 image_offset, 1, 0xf, 0);
702
703 if (nir_intrinsic_has_access(intrin) & ACCESS_COHERENT)
704 store->set_ack();
705 shader.emit_instruction(store);
706 return true;
707 }
708
emit_image_load_or_atomic(nir_intrinsic_instr * intrin,Shader & shader)709 bool RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader)
710 {
711 auto& vf = shader.value_factory();
712 auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0); {}
713
714 bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
715 auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) :
716 get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
717
718 auto coord_orig = vf.src_vec4(intrin->src[1], pin_chan);
719 auto coord = vf.temp_vec4(pin_group);
720
721 auto data_vec4 = vf.temp_vec4(pin_group, {0,1,2,3});
722
723 RegisterVec4::Swizzle swizzle = {0,1,2,3};
724 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
725 nir_intrinsic_image_array(intrin))
726 swizzle = {0,2,1,3};
727
728 for (int i = 0; i < 4; ++i) {
729 auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
730 shader.emit_instruction(new AluInstr(op1_mov, coord[swizzle[i]], coord_orig[i], flags));
731 }
732
733 shader.emit_instruction(new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
734
735 if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
736 shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[4], 0), AluInstr::write));
737 shader.emit_instruction(new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
738 vf.src(intrin->src[3], 0), AluInstr::last_write));
739 } else {
740 shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0],
741 vf.src(intrin->src[3], 0), AluInstr::write));
742 shader.emit_instruction(new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::last_write));
743 }
744
745 auto atomic = new RatInstr(cf_mem_rat, opcode, data_vec4, coord, imageid,
746 image_offset, 1, 0xf, 0);
747 shader.emit_instruction(atomic);
748
749 atomic->set_ack();
750 if (read_result) {
751 atomic->set_instr_flag(ack_rat_return_write);
752 auto dest = vf.dest_vec4(intrin->dest, pin_group);
753
754 pipe_format format = nir_intrinsic_format(intrin);
755 unsigned fmt = fmt_32;
756 unsigned num_format = 0;
757 unsigned format_comp = 0;
758 unsigned endian = 0;
759 r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
760
761 auto fetch = new FetchInstr(vc_fetch,
762 dest, {0, 1, 2, 3},
763 shader.rat_return_address(),
764 0,
765 no_index_offset,
766 (EVTXDataFormat)fmt,
767 (EVFetchNumFormat)num_format,
768 (EVFetchEndianSwap)endian,
769 R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
770 image_offset);
771 fetch->set_mfc(3);
772 fetch->set_fetch_flag(FetchInstr::srf_mode);
773 fetch->set_fetch_flag(FetchInstr::use_tc);
774 fetch->set_fetch_flag(FetchInstr::vpm);
775 fetch->set_fetch_flag(FetchInstr::wait_ack);
776 if (format_comp)
777 fetch->set_fetch_flag(FetchInstr::format_comp_signed);
778
779 shader.chain_ssbo_read(fetch);
780 shader.emit_instruction(fetch);
781 }
782
783 return true;
784 }
785
786 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
787
emit_image_size(nir_intrinsic_instr * intrin,Shader & shader)788 bool RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader)
789 {
790 auto& vf = shader.value_factory();
791
792 auto src = RegisterVec4(0, true, {4,4,4,4});
793
794 assert(nir_src_as_uint(intrin->src[1]) == 0);
795
796 auto const_offset = nir_src_as_const_value(intrin->src[0]);
797 PRegister dyn_offset = nullptr;
798
799 int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
800 if (const_offset)
801 res_id += const_offset[0].u32;
802 else
803 dyn_offset = shader.emit_load_to_register(vf.src(intrin->src[0], 0));
804
805 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
806 auto dest = vf.dest_vec4(intrin->dest, pin_group);
807 shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,1,2,3}, res_id));
808 return true;
809 } else {
810
811 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
812 nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
813 /* Need to load the layers from a const buffer */
814
815 auto dest = vf.dest_vec4(intrin->dest, pin_group);
816 shader.emit_instruction(new TexInstr(TexInstr::get_resinfo, dest, {0,1,7,3},
817 src, 0/* ?? */, res_id, dyn_offset));
818
819 shader.set_flag(Shader::sh_txs_cube_array_comp);
820
821 if (const_offset) {
822 unsigned lookup_resid = const_offset[0].u32;
823 shader.emit_instruction(new AluInstr(op1_mov, dest[2],
824 vf.uniform(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4,
825 R600_BUFFER_INFO_CONST_BUFFER),
826 AluInstr::last_write));
827 } else {
828 /* If the adressing is indirect we have to get the z-value by using a binary search */
829 auto addr = vf.temp_register();
830 auto comp1 = vf.temp_register();
831 auto comp2 = vf.temp_register();
832 auto low_bit = vf.temp_register();
833 auto high_bit = vf.temp_register();
834
835 auto trgt = vf.temp_vec4(pin_group);
836
837 shader.emit_instruction(new AluInstr(op2_lshr_int, addr, vf.src(intrin->src[0], 0),
838 vf.literal(2), AluInstr::write));
839 shader.emit_instruction(new AluInstr(op2_and_int, low_bit, vf.src(intrin->src[0], 0),
840 vf.one_i(), AluInstr::write));
841 shader.emit_instruction(new AluInstr(op2_and_int, high_bit, vf.src(intrin->src[0], 0),
842 vf.literal(2), AluInstr::last_write));
843
844 shader.emit_instruction(new LoadFromBuffer(trgt, {0, 1, 2, 3}, addr, R600_SHADER_BUFFER_INFO_SEL,
845 R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float));
846
847 // this may be wrong
848 shader.emit_instruction(new AluInstr(op3_cnde_int, comp1, high_bit, trgt[0], trgt[2],
849 AluInstr::write));
850 shader.emit_instruction(new AluInstr(op3_cnde_int, comp2, high_bit, trgt[1], trgt[3],
851 AluInstr::last_write));
852 shader.emit_instruction(new AluInstr(op3_cnde_int, dest[2], low_bit, comp1, comp2, AluInstr::last_write));
853 }
854 } else {
855 auto dest = vf.dest_vec4(intrin->dest, pin_group);
856 shader.emit_instruction(new TexInstr(TexInstr::get_resinfo, dest, {0,1,2,3},
857 src, 0/* ?? */, res_id, dyn_offset));
858
859 }
860 }
861 return true;
862 }
863
864 }
865