1 #include "sfn_emitssboinstruction.h"
2
3 #include "sfn_instruction_fetch.h"
4 #include "sfn_instruction_gds.h"
5 #include "sfn_instruction_misc.h"
6 #include "sfn_instruction_tex.h"
7 #include "../r600_pipe.h"
8 #include "../r600_asm.h"
9
10 namespace r600 {
11
12 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
13
EmitSSBOInstruction(ShaderFromNirProcessor & processor)14 EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor):
15 EmitInstruction(processor),
16 m_require_rat_return_address(false),
17 m_ssbo_image_offset(0)
18 {
19 }
20
set_ssbo_offset(int offset)21 void EmitSSBOInstruction::set_ssbo_offset(int offset)
22 {
23 m_ssbo_image_offset = offset;
24 }
25
26
set_require_rat_return_address()27 void EmitSSBOInstruction::set_require_rat_return_address()
28 {
29 m_require_rat_return_address = true;
30 }
31
32 bool
load_rat_return_address()33 EmitSSBOInstruction::load_rat_return_address()
34 {
35 if (m_require_rat_return_address) {
36 m_rat_return_address = get_temp_vec4();
37 emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write}));
38 emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write}));
39 emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)),
40 literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr}));
41 emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1),
42 m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0),
43 {alu_write, alu_last_instr}));
44 m_require_rat_return_address = false;
45 }
46 return true;
47 }
48
49
do_emit(nir_instr * instr)50 bool EmitSSBOInstruction::do_emit(nir_instr* instr)
51 {
52 const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
53 switch (intr->intrinsic) {
54 case nir_intrinsic_atomic_counter_add:
55 case nir_intrinsic_atomic_counter_and:
56 case nir_intrinsic_atomic_counter_exchange:
57 case nir_intrinsic_atomic_counter_max:
58 case nir_intrinsic_atomic_counter_min:
59 case nir_intrinsic_atomic_counter_or:
60 case nir_intrinsic_atomic_counter_xor:
61 case nir_intrinsic_atomic_counter_comp_swap:
62 return emit_atomic(intr);
63 case nir_intrinsic_atomic_counter_read:
64 case nir_intrinsic_atomic_counter_post_dec:
65 return emit_unary_atomic(intr);
66 case nir_intrinsic_atomic_counter_inc:
67 return emit_atomic_inc(intr);
68 case nir_intrinsic_atomic_counter_pre_dec:
69 return emit_atomic_pre_dec(intr);
70 case nir_intrinsic_load_ssbo:
71 return emit_load_ssbo(intr);
72 case nir_intrinsic_store_ssbo:
73 return emit_store_ssbo(intr);
74 case nir_intrinsic_ssbo_atomic_add:
75 case nir_intrinsic_ssbo_atomic_comp_swap:
76 case nir_intrinsic_ssbo_atomic_or:
77 case nir_intrinsic_ssbo_atomic_xor:
78 case nir_intrinsic_ssbo_atomic_imax:
79 case nir_intrinsic_ssbo_atomic_imin:
80 case nir_intrinsic_ssbo_atomic_umax:
81 case nir_intrinsic_ssbo_atomic_umin:
82 case nir_intrinsic_ssbo_atomic_and:
83 case nir_intrinsic_ssbo_atomic_exchange:
84 return emit_ssbo_atomic_op(intr);
85 case nir_intrinsic_image_store:
86 return emit_image_store(intr);
87 case nir_intrinsic_image_load:
88 case nir_intrinsic_image_atomic_add:
89 case nir_intrinsic_image_atomic_and:
90 case nir_intrinsic_image_atomic_or:
91 case nir_intrinsic_image_atomic_xor:
92 case nir_intrinsic_image_atomic_exchange:
93 case nir_intrinsic_image_atomic_comp_swap:
94 case nir_intrinsic_image_atomic_umin:
95 case nir_intrinsic_image_atomic_umax:
96 case nir_intrinsic_image_atomic_imin:
97 case nir_intrinsic_image_atomic_imax:
98 return emit_image_load(intr);
99 case nir_intrinsic_image_size:
100 return emit_image_size(intr);
101 case nir_intrinsic_get_ssbo_size:
102 return emit_buffer_size(intr);
103 case nir_intrinsic_memory_barrier:
104 case nir_intrinsic_memory_barrier_image:
105 case nir_intrinsic_memory_barrier_buffer:
106 case nir_intrinsic_group_memory_barrier:
107 return make_stores_ack_and_waitack();
108 default:
109 return false;
110 }
111 }
112
emit_atomic(const nir_intrinsic_instr * instr)113 bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr* instr)
114 {
115 bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
116
117 ESDOp op = read_result ? get_opcode(instr->intrinsic) :
118 get_opcode_wo(instr->intrinsic);
119
120 if (DS_OP_INVALID == op)
121 return false;
122
123
124
125 GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7});
126
127 int base = remap_atomic_base(nir_intrinsic_base(instr));
128
129 PValue uav_id = from_nir(instr->src[0], 0);
130
131 PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
132
133 GDSInstr *ir = nullptr;
134 if (instr->intrinsic == nir_intrinsic_atomic_counter_comp_swap) {
135 PValue value2 = from_nir_with_fetch_constant(instr->src[2], 0);
136 ir = new GDSInstr(op, dest, value, value2, uav_id, base);
137 } else {
138 ir = new GDSInstr(op, dest, value, uav_id, base);
139 }
140
141 emit_instruction(ir);
142 return true;
143 }
144
emit_unary_atomic(const nir_intrinsic_instr * instr)145 bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr* instr)
146 {
147 bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
148
149 ESDOp op = read_result ? get_opcode(instr->intrinsic) : get_opcode_wo(instr->intrinsic);
150
151 if (DS_OP_INVALID == op)
152 return false;
153
154 GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7});
155
156 PValue uav_id = from_nir(instr->src[0], 0);
157
158 auto ir = new GDSInstr(op, dest, uav_id, remap_atomic_base(nir_intrinsic_base(instr)));
159
160 emit_instruction(ir);
161 return true;
162 }
163
get_opcode(const nir_intrinsic_op opcode) const164 ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode) const
165 {
166 switch (opcode) {
167 case nir_intrinsic_atomic_counter_add:
168 return DS_OP_ADD_RET;
169 case nir_intrinsic_atomic_counter_and:
170 return DS_OP_AND_RET;
171 case nir_intrinsic_atomic_counter_exchange:
172 return DS_OP_XCHG_RET;
173 case nir_intrinsic_atomic_counter_inc:
174 return DS_OP_INC_RET;
175 case nir_intrinsic_atomic_counter_max:
176 return DS_OP_MAX_UINT_RET;
177 case nir_intrinsic_atomic_counter_min:
178 return DS_OP_MIN_UINT_RET;
179 case nir_intrinsic_atomic_counter_or:
180 return DS_OP_OR_RET;
181 case nir_intrinsic_atomic_counter_read:
182 return DS_OP_READ_RET;
183 case nir_intrinsic_atomic_counter_xor:
184 return DS_OP_XOR_RET;
185 case nir_intrinsic_atomic_counter_post_dec:
186 return DS_OP_DEC_RET;
187 case nir_intrinsic_atomic_counter_comp_swap:
188 return DS_OP_CMP_XCHG_RET;
189 case nir_intrinsic_atomic_counter_pre_dec:
190 default:
191 return DS_OP_INVALID;
192 }
193 }
194
get_opcode_wo(const nir_intrinsic_op opcode) const195 ESDOp EmitSSBOInstruction::get_opcode_wo(const nir_intrinsic_op opcode) const
196 {
197 switch (opcode) {
198 case nir_intrinsic_atomic_counter_add:
199 return DS_OP_ADD;
200 case nir_intrinsic_atomic_counter_and:
201 return DS_OP_AND;
202 case nir_intrinsic_atomic_counter_inc:
203 return DS_OP_INC;
204 case nir_intrinsic_atomic_counter_max:
205 return DS_OP_MAX_UINT;
206 case nir_intrinsic_atomic_counter_min:
207 return DS_OP_MIN_UINT;
208 case nir_intrinsic_atomic_counter_or:
209 return DS_OP_OR;
210 case nir_intrinsic_atomic_counter_xor:
211 return DS_OP_XOR;
212 case nir_intrinsic_atomic_counter_post_dec:
213 return DS_OP_DEC;
214 case nir_intrinsic_atomic_counter_comp_swap:
215 return DS_OP_CMP_XCHG_RET;
216 case nir_intrinsic_atomic_counter_exchange:
217 return DS_OP_XCHG_RET;
218 case nir_intrinsic_atomic_counter_pre_dec:
219 default:
220 return DS_OP_INVALID;
221 }
222 }
223
224 RatInstruction::ERatOp
get_rat_opcode(const nir_intrinsic_op opcode,pipe_format format) const225 EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const
226 {
227 switch (opcode) {
228 case nir_intrinsic_ssbo_atomic_add:
229 case nir_intrinsic_image_atomic_add:
230 return RatInstruction::ADD_RTN;
231 case nir_intrinsic_ssbo_atomic_and:
232 case nir_intrinsic_image_atomic_and:
233 return RatInstruction::AND_RTN;
234 case nir_intrinsic_ssbo_atomic_exchange:
235 case nir_intrinsic_image_atomic_exchange:
236 return RatInstruction::XCHG_RTN;
237 case nir_intrinsic_ssbo_atomic_or:
238 case nir_intrinsic_image_atomic_or:
239 return RatInstruction::OR_RTN;
240 case nir_intrinsic_ssbo_atomic_imin:
241 case nir_intrinsic_image_atomic_imin:
242 return RatInstruction::MIN_INT_RTN;
243 case nir_intrinsic_ssbo_atomic_imax:
244 case nir_intrinsic_image_atomic_imax:
245 return RatInstruction::MAX_INT_RTN;
246 case nir_intrinsic_ssbo_atomic_umin:
247 case nir_intrinsic_image_atomic_umin:
248 return RatInstruction::MIN_UINT_RTN;
249 case nir_intrinsic_ssbo_atomic_umax:
250 case nir_intrinsic_image_atomic_umax:
251 return RatInstruction::MAX_UINT_RTN;
252 case nir_intrinsic_ssbo_atomic_xor:
253 case nir_intrinsic_image_atomic_xor:
254 return RatInstruction::XOR_RTN;
255 case nir_intrinsic_ssbo_atomic_comp_swap:
256 case nir_intrinsic_image_atomic_comp_swap:
257 if (util_format_is_float(format))
258 return RatInstruction::CMPXCHG_FLT_RTN;
259 else
260 return RatInstruction::CMPXCHG_INT_RTN;
261 case nir_intrinsic_image_load:
262 return RatInstruction::NOP_RTN;
263 default:
264 unreachable("Unsupported RAT instruction");
265 }
266 }
267
268 RatInstruction::ERatOp
get_rat_opcode_wo(const nir_intrinsic_op opcode,pipe_format format) const269 EmitSSBOInstruction::get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format) const
270 {
271 switch (opcode) {
272 case nir_intrinsic_ssbo_atomic_add:
273 case nir_intrinsic_image_atomic_add:
274 return RatInstruction::ADD;
275 case nir_intrinsic_ssbo_atomic_and:
276 case nir_intrinsic_image_atomic_and:
277 return RatInstruction::AND;
278 case nir_intrinsic_ssbo_atomic_or:
279 case nir_intrinsic_image_atomic_or:
280 return RatInstruction::OR;
281 case nir_intrinsic_ssbo_atomic_imin:
282 case nir_intrinsic_image_atomic_imin:
283 return RatInstruction::MIN_INT;
284 case nir_intrinsic_ssbo_atomic_imax:
285 case nir_intrinsic_image_atomic_imax:
286 return RatInstruction::MAX_INT;
287 case nir_intrinsic_ssbo_atomic_umin:
288 case nir_intrinsic_image_atomic_umin:
289 return RatInstruction::MIN_UINT;
290 case nir_intrinsic_ssbo_atomic_umax:
291 case nir_intrinsic_image_atomic_umax:
292 return RatInstruction::MAX_UINT;
293 case nir_intrinsic_ssbo_atomic_xor:
294 case nir_intrinsic_image_atomic_xor:
295 return RatInstruction::XOR;
296 case nir_intrinsic_ssbo_atomic_comp_swap:
297 case nir_intrinsic_image_atomic_comp_swap:
298 if (util_format_is_float(format))
299 return RatInstruction::CMPXCHG_FLT;
300 else
301 return RatInstruction::CMPXCHG_INT;
302 default:
303 unreachable("Unsupported WO RAT instruction");
304 }
305 }
306
load_atomic_inc_limits()307 bool EmitSSBOInstruction::load_atomic_inc_limits()
308 {
309 m_atomic_update = get_temp_register();
310 m_atomic_update->set_keep_alive();
311 emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1),
312 {alu_write, alu_last_instr}));
313 return true;
314 }
315
emit_atomic_inc(const nir_intrinsic_instr * instr)316 bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr)
317 {
318 bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
319 PValue uav_id = from_nir(instr->src[0], 0);
320 GPRVector dest = read_result ? make_dest(instr): GPRVector(0, {7,7,7,7});
321 auto ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest,
322 m_atomic_update, uav_id,
323 remap_atomic_base(nir_intrinsic_base(instr)));
324 emit_instruction(ir);
325 return true;
326 }
327
emit_atomic_pre_dec(const nir_intrinsic_instr * instr)328 bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr)
329 {
330 GPRVector dest = make_dest(instr);
331
332 PValue uav_id = from_nir(instr->src[0], 0);
333
334 auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id,
335 remap_atomic_base(nir_intrinsic_base(instr)));
336 emit_instruction(ir);
337
338 emit_instruction(new AluInstruction(op2_sub_int, dest.x(), dest.x(), literal(1), last_write));
339
340 return true;
341 }
342
emit_load_ssbo(const nir_intrinsic_instr * instr)343 bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr* instr)
344 {
345 GPRVector dest = make_dest(instr);
346
347 /** src0 not used, should be some offset */
348 auto addr = from_nir(instr->src[1], 0);
349 PValue addr_temp = create_register_from_nir_src(instr->src[1], 1);
350
351 /** Should be lowered in nir */
352 emit_instruction(new AluInstruction(op2_lshr_int, addr_temp, {addr, PValue(new LiteralValue(2))},
353 {alu_write, alu_last_instr}));
354
355 const EVTXDataFormat formats[4] = {
356 fmt_32,
357 fmt_32_32,
358 fmt_32_32_32,
359 fmt_32_32_32_32
360 };
361
362 const std::array<int,4> dest_swt[4] = {
363 {0,7,7,7},
364 {0,1,7,7},
365 {0,1,2,7},
366 {0,1,2,3}
367 };
368
369 /* TODO fix resource index */
370 auto ir = new FetchInstruction(dest, addr_temp,
371 R600_IMAGE_REAL_RESOURCE_OFFSET + m_ssbo_image_offset
372 , from_nir(instr->src[0], 0),
373 formats[nir_dest_num_components(instr->dest) - 1], vtx_nf_int);
374 ir->set_dest_swizzle(dest_swt[nir_dest_num_components(instr->dest) - 1]);
375 ir->set_flag(vtx_use_tc);
376
377 emit_instruction(ir);
378 return true;
379 }
380
emit_store_ssbo(const nir_intrinsic_instr * instr)381 bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr)
382 {
383
384 GPRVector::Swizzle swz = {7,7,7,7};
385 for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i)
386 swz[i] = i;
387
388 auto orig_addr = from_nir(instr->src[2], 0);
389
390 GPRVector addr_vec = get_temp_vec4({0,1,2,7});
391
392 auto temp2 = get_temp_vec4();
393
394 auto rat_id = from_nir(instr->src[1], 0);
395
396 emit_instruction(new AluInstruction(op2_lshr_int, addr_vec.reg_i(0), orig_addr,
397 PValue(new LiteralValue(2)), write));
398 emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(1), Value::zero, write));
399 emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(2), Value::zero, last_write));
400
401
402 auto values = vec_from_nir_with_fetch_constant(instr->src[0],
403 (1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true);
404
405 auto cf_op = cf_mem_rat;
406 //auto cf_op = nir_intrinsic_access(instr) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
407 auto store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
408 values, addr_vec, m_ssbo_image_offset, rat_id, 1,
409 1, 0, false);
410 emit_instruction(store);
411 m_store_ops.push_back(store);
412
413 for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) {
414 emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), get_chip_class() == CAYMAN ? last_write : write));
415 emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0),
416 {addr_vec.reg_i(0), Value::one_i}, last_write));
417 store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
418 temp2, addr_vec, m_ssbo_image_offset, rat_id, 1,
419 1, 0, false);
420 emit_instruction(store);
421 if (!(nir_intrinsic_access(instr) & ACCESS_COHERENT))
422 m_store_ops.push_back(store);
423 }
424
425 return true;
426 }
427
428 bool
emit_image_store(const nir_intrinsic_instr * intrin)429 EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin)
430 {
431 int imageid = 0;
432 PValue image_offset;
433
434 if (nir_src_is_const(intrin->src[0]))
435 imageid = nir_src_as_int(intrin->src[0]);
436 else
437 image_offset = from_nir(intrin->src[0], 0);
438
439 auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3});
440 auto undef = from_nir(intrin->src[2], 0);
441 auto value = vec_from_nir_with_fetch_constant(intrin->src[3], 0xf, {0,1,2,3});
442 auto unknown = from_nir(intrin->src[4], 0);
443
444 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
445 nir_intrinsic_image_array(intrin)) {
446 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
447 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
448 }
449
450 auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
451 auto store = new RatInstruction(op, RatInstruction::STORE_TYPED, value, coord, imageid,
452 image_offset, 1, 0xf, 0, false);
453
454 //if (!(nir_intrinsic_access(intrin) & ACCESS_COHERENT))
455 m_store_ops.push_back(store);
456
457 emit_instruction(store);
458 return true;
459 }
460
461 bool
emit_ssbo_atomic_op(const nir_intrinsic_instr * intrin)462 EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin)
463 {
464 int imageid = 0;
465 PValue image_offset;
466
467 if (nir_src_is_const(intrin->src[0]))
468 imageid = nir_src_as_int(intrin->src[0]);
469 else
470 image_offset = from_nir(intrin->src[0], 0);
471
472 bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
473 auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) :
474 get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
475
476 auto coord_orig = from_nir(intrin->src[1], 0, 0);
477 auto coord = get_temp_register(0);
478
479 emit_instruction(new AluInstruction(op2_lshr_int, coord, coord_orig, literal(2), last_write));
480
481 if (intrin->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
482 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
483 from_nir(intrin->src[3], 0), {alu_write}));
484 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3),
485 from_nir(intrin->src[2], 0), {alu_last_instr, alu_write}));
486 } else {
487 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
488 from_nir(intrin->src[2], 0), {alu_write}));
489 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write));
490 }
491
492
493 GPRVector out_vec({coord, coord, coord, coord});
494
495 auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid + m_ssbo_image_offset,
496 image_offset, 1, 0xf, 0, true);
497 emit_instruction(atomic);
498
499 if (read_result) {
500 emit_instruction(new WaitAck(0));
501
502 GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components);
503 auto fetch = new FetchInstruction(vc_fetch,
504 no_index_offset,
505 fmt_32,
506 vtx_nf_int,
507 vtx_es_none,
508 m_rat_return_address.reg_i(1),
509 dest,
510 0,
511 false,
512 0xf,
513 R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
514 0,
515 bim_none,
516 false,
517 false,
518 0,
519 0,
520 0,
521 image_offset,
522 {0,7,7,7});
523 fetch->set_flag(vtx_srf_mode);
524 fetch->set_flag(vtx_use_tc);
525 fetch->set_flag(vtx_vpm);
526 emit_instruction(fetch);
527 }
528
529 return true;
530
531 }
532
533 bool
emit_image_load(const nir_intrinsic_instr * intrin)534 EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin)
535 {
536 int imageid = 0;
537 PValue image_offset;
538
539 if (nir_src_is_const(intrin->src[0]))
540 imageid = nir_src_as_int(intrin->src[0]);
541 else
542 image_offset = from_nir(intrin->src[0], 0);
543
544 bool read_retvalue = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
545 auto rat_op = read_retvalue ? get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin)):
546 get_rat_opcode_wo(intrin->intrinsic, nir_intrinsic_format(intrin));
547
548 GPRVector::Swizzle swz = {0,1,2,3};
549 auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz);
550
551 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
552 nir_intrinsic_image_array(intrin)) {
553 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
554 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
555 }
556
557 if (intrin->intrinsic != nir_intrinsic_image_load) {
558 if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
559 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
560 from_nir(intrin->src[4], 0), {alu_write}));
561 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3),
562 from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
563 } else {
564 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
565 from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
566 }
567 }
568 auto cf_op = cf_mem_rat;// nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
569
570 auto store = new RatInstruction(cf_op, rat_op, m_rat_return_address, coord, imageid,
571 image_offset, 1, 0xf, 0, true);
572 emit_instruction(store);
573 return read_retvalue ? fetch_return_value(intrin) : true;
574 }
575
fetch_return_value(const nir_intrinsic_instr * intrin)576 bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin)
577 {
578 emit_instruction(new WaitAck(0));
579
580 pipe_format format = nir_intrinsic_format(intrin);
581 unsigned fmt = fmt_32;
582 unsigned num_format = 0;
583 unsigned format_comp = 0;
584 unsigned endian = 0;
585
586 int imageid = 0;
587 PValue image_offset;
588
589 if (nir_src_is_const(intrin->src[0]))
590 imageid = nir_src_as_int(intrin->src[0]);
591 else
592 image_offset = from_nir(intrin->src[0], 0);
593
594 r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
595
596 GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
597
598 auto fetch = new FetchInstruction(vc_fetch,
599 no_index_offset,
600 (EVTXDataFormat)fmt,
601 (EVFetchNumFormat)num_format,
602 (EVFetchEndianSwap)endian,
603 m_rat_return_address.reg_i(1),
604 dest,
605 0,
606 false,
607 0x3,
608 R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
609 0,
610 bim_none,
611 false,
612 false,
613 0,
614 0,
615 0,
616 image_offset, {0,1,2,3});
617 fetch->set_flag(vtx_srf_mode);
618 fetch->set_flag(vtx_use_tc);
619 fetch->set_flag(vtx_vpm);
620 if (format_comp)
621 fetch->set_flag(vtx_format_comp_signed);
622
623 emit_instruction(fetch);
624 return true;
625 }
626
emit_image_size(const nir_intrinsic_instr * intrin)627 bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr *intrin)
628 {
629 GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
630 GPRVector src{0,{4,4,4,4}};
631
632 assert(nir_src_as_uint(intrin->src[1]) == 0);
633
634 auto const_offset = nir_src_as_const_value(intrin->src[0]);
635 auto dyn_offset = PValue();
636 int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
637 if (const_offset)
638 res_id += const_offset[0].u32;
639 else
640 dyn_offset = from_nir(intrin->src[0], 0);
641
642 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
643 emit_instruction(new FetchInstruction(dest, PValue(new GPRValue(0, 7)),
644 res_id,
645 bim_none));
646 return true;
647 } else {
648 emit_instruction(new TexInstruction(TexInstruction::get_resinfo, dest, src,
649 0/* ?? */,
650 res_id, dyn_offset));
651 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
652 nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
653 /* Need to load the layers from a const buffer */
654
655 set_has_txs_cube_array_comp();
656
657 if (const_offset) {
658 unsigned lookup_resid = const_offset[0].u32;
659 emit_instruction(new AluInstruction(op1_mov, dest.reg_i(2),
660 PValue(new UniformValue(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4,
661 R600_BUFFER_INFO_CONST_BUFFER)),
662 EmitInstruction::last_write));
663 } else {
664 /* If the adressing is indirect we have to get the z-value by using a binary search */
665 GPRVector trgt;
666 GPRVector help;
667
668 auto addr = help.reg_i(0);
669 auto comp = help.reg_i(1);
670 auto low_bit = help.reg_i(2);
671 auto high_bit = help.reg_i(3);
672
673 emit_instruction(new AluInstruction(op2_lshr_int, addr, from_nir(intrin->src[0], 0),
674 literal(2), EmitInstruction::write));
675 emit_instruction(new AluInstruction(op2_and_int, comp, from_nir(intrin->src[0], 0),
676 literal(3), EmitInstruction::last_write));
677
678 emit_instruction(new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, R600_SHADER_BUFFER_INFO_SEL,
679 R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none));
680
681 emit_instruction(new AluInstruction(op3_cnde_int, comp, high_bit, trgt.reg_i(0), trgt.reg_i(2),
682 EmitInstruction::write));
683 emit_instruction(new AluInstruction(op3_cnde_int, high_bit, high_bit, trgt.reg_i(1), trgt.reg_i(3),
684 EmitInstruction::last_write));
685
686 emit_instruction(new AluInstruction(op3_cnde_int, dest.reg_i(2), low_bit, comp, high_bit, EmitInstruction::last_write));
687 }
688 }
689 }
690 return true;
691 }
692
emit_buffer_size(const nir_intrinsic_instr * intr)693 bool EmitSSBOInstruction::emit_buffer_size(const nir_intrinsic_instr *intr)
694 {
695 std::array<PValue,4> dst_elms;
696
697
698 for (uint16_t i = 0; i < 4; ++i) {
699 dst_elms[i] = from_nir(intr->dest, (i < intr->dest.ssa.num_components) ? i : 7);
700 }
701
702 GPRVector dst(dst_elms);
703 GPRVector src(0,{4,4,4,4});
704
705 auto const_offset = nir_src_as_const_value(intr->src[0]);
706 auto dyn_offset = PValue();
707 int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
708 if (const_offset)
709 res_id += const_offset[0].u32;
710 else
711 assert(0 && "dynamic buffer offset not supported in buffer_size");
712
713 emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
714 res_id, bim_none));
715
716 return true;
717 }
718
make_stores_ack_and_waitack()719 bool EmitSSBOInstruction::make_stores_ack_and_waitack()
720 {
721 for (auto&& store: m_store_ops)
722 store->set_ack();
723
724 if (!m_store_ops.empty())
725 emit_instruction(new WaitAck(0));
726
727 m_store_ops.clear();
728
729 return true;
730 }
731
make_dest(const nir_intrinsic_instr * ir)732 GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir)
733 {
734 GPRVector::Values v;
735 int i;
736 for (i = 0; i < 4; ++i)
737 v[i] = from_nir(ir->dest, i);
738 return GPRVector(v);
739 }
740
741 }
742