• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "sfn_emitssboinstruction.h"
2 
3 #include "sfn_instruction_fetch.h"
4 #include "sfn_instruction_gds.h"
5 #include "sfn_instruction_misc.h"
6 #include "sfn_instruction_tex.h"
7 #include "../r600_pipe.h"
8 #include "../r600_asm.h"
9 
10 namespace r600 {
11 
12 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
13 
EmitSSBOInstruction(ShaderFromNirProcessor & processor)14 EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor):
15    EmitInstruction(processor),
16    m_require_rat_return_address(false),
17    m_ssbo_image_offset(0)
18 {
19 }
20 
set_ssbo_offset(int offset)21 void EmitSSBOInstruction::set_ssbo_offset(int offset)
22 {
23    m_ssbo_image_offset = offset;
24 }
25 
26 
set_require_rat_return_address()27 void EmitSSBOInstruction::set_require_rat_return_address()
28 {
29    m_require_rat_return_address = true;
30 }
31 
32 bool
load_rat_return_address()33 EmitSSBOInstruction::load_rat_return_address()
34 {
35    if (m_require_rat_return_address) {
36       m_rat_return_address = get_temp_vec4();
37       emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write}));
38       emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write}));
39       emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)),
40                                           literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr}));
41       emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1),
42                                           m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0),
43       {alu_write, alu_last_instr}));
44       m_require_rat_return_address = false;
45    }
46    return true;
47 }
48 
49 
do_emit(nir_instr * instr)50 bool EmitSSBOInstruction::do_emit(nir_instr* instr)
51 {
52    const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
53    switch (intr->intrinsic) {
54    case nir_intrinsic_atomic_counter_add:
55    case nir_intrinsic_atomic_counter_and:
56    case nir_intrinsic_atomic_counter_exchange:
57    case nir_intrinsic_atomic_counter_max:
58    case nir_intrinsic_atomic_counter_min:
59    case nir_intrinsic_atomic_counter_or:
60    case nir_intrinsic_atomic_counter_xor:
61    case nir_intrinsic_atomic_counter_comp_swap:
62       return emit_atomic(intr);
63    case nir_intrinsic_atomic_counter_read:
64    case nir_intrinsic_atomic_counter_post_dec:
65       return emit_unary_atomic(intr);
66    case nir_intrinsic_atomic_counter_inc:
67       return emit_atomic_inc(intr);
68    case nir_intrinsic_atomic_counter_pre_dec:
69       return emit_atomic_pre_dec(intr);
70    case nir_intrinsic_load_ssbo:
71        return emit_load_ssbo(intr);
72    case nir_intrinsic_store_ssbo:
73       return emit_store_ssbo(intr);
74    case nir_intrinsic_ssbo_atomic_add:
75    case nir_intrinsic_ssbo_atomic_comp_swap:
76    case nir_intrinsic_ssbo_atomic_or:
77    case nir_intrinsic_ssbo_atomic_xor:
78    case nir_intrinsic_ssbo_atomic_imax:
79    case nir_intrinsic_ssbo_atomic_imin:
80    case nir_intrinsic_ssbo_atomic_umax:
81    case nir_intrinsic_ssbo_atomic_umin:
82    case nir_intrinsic_ssbo_atomic_and:
83    case nir_intrinsic_ssbo_atomic_exchange:
84       return emit_ssbo_atomic_op(intr);
85    case nir_intrinsic_image_store:
86       return emit_image_store(intr);
87    case nir_intrinsic_image_load:
88    case nir_intrinsic_image_atomic_add:
89    case nir_intrinsic_image_atomic_and:
90    case nir_intrinsic_image_atomic_or:
91    case nir_intrinsic_image_atomic_xor:
92    case nir_intrinsic_image_atomic_exchange:
93    case nir_intrinsic_image_atomic_comp_swap:
94    case nir_intrinsic_image_atomic_umin:
95    case nir_intrinsic_image_atomic_umax:
96    case nir_intrinsic_image_atomic_imin:
97    case nir_intrinsic_image_atomic_imax:
98       return emit_image_load(intr);
99    case nir_intrinsic_image_size:
100       return emit_image_size(intr);
101    case nir_intrinsic_get_ssbo_size:
102       return emit_buffer_size(intr);
103    case nir_intrinsic_memory_barrier:
104    case nir_intrinsic_memory_barrier_image:
105    case nir_intrinsic_memory_barrier_buffer:
106    case nir_intrinsic_group_memory_barrier:
107       return make_stores_ack_and_waitack();
108    default:
109       return false;
110    }
111 }
112 
emit_atomic(const nir_intrinsic_instr * instr)113 bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr* instr)
114 {
115    bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
116 
117    ESDOp op = read_result ? get_opcode(instr->intrinsic) :
118                             get_opcode_wo(instr->intrinsic);
119 
120    if (DS_OP_INVALID == op)
121       return false;
122 
123 
124 
125    GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7});
126 
127    int base = remap_atomic_base(nir_intrinsic_base(instr));
128 
129    PValue uav_id = from_nir(instr->src[0], 0);
130 
131    PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
132 
133    GDSInstr *ir = nullptr;
134    if (instr->intrinsic == nir_intrinsic_atomic_counter_comp_swap)  {
135       PValue value2 = from_nir_with_fetch_constant(instr->src[2], 0);
136       ir = new GDSInstr(op, dest, value, value2, uav_id, base);
137    } else {
138       ir = new GDSInstr(op, dest, value, uav_id, base);
139    }
140 
141    emit_instruction(ir);
142    return true;
143 }
144 
emit_unary_atomic(const nir_intrinsic_instr * instr)145 bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr* instr)
146 {
147    bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
148 
149    ESDOp op = read_result ? get_opcode(instr->intrinsic) : get_opcode_wo(instr->intrinsic);
150 
151    if (DS_OP_INVALID == op)
152       return false;
153 
154    GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7});
155 
156    PValue uav_id = from_nir(instr->src[0], 0);
157 
158    auto ir = new GDSInstr(op, dest, uav_id, remap_atomic_base(nir_intrinsic_base(instr)));
159 
160    emit_instruction(ir);
161    return true;
162 }
163 
get_opcode(const nir_intrinsic_op opcode) const164 ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode) const
165 {
166    switch (opcode) {
167    case nir_intrinsic_atomic_counter_add:
168       return DS_OP_ADD_RET;
169    case nir_intrinsic_atomic_counter_and:
170       return DS_OP_AND_RET;
171    case nir_intrinsic_atomic_counter_exchange:
172       return DS_OP_XCHG_RET;
173    case nir_intrinsic_atomic_counter_inc:
174       return DS_OP_INC_RET;
175    case nir_intrinsic_atomic_counter_max:
176       return DS_OP_MAX_UINT_RET;
177    case nir_intrinsic_atomic_counter_min:
178       return DS_OP_MIN_UINT_RET;
179    case nir_intrinsic_atomic_counter_or:
180       return DS_OP_OR_RET;
181    case nir_intrinsic_atomic_counter_read:
182       return DS_OP_READ_RET;
183    case nir_intrinsic_atomic_counter_xor:
184       return DS_OP_XOR_RET;
185    case nir_intrinsic_atomic_counter_post_dec:
186       return DS_OP_DEC_RET;
187    case nir_intrinsic_atomic_counter_comp_swap:
188       return DS_OP_CMP_XCHG_RET;
189    case nir_intrinsic_atomic_counter_pre_dec:
190    default:
191       return DS_OP_INVALID;
192    }
193 }
194 
get_opcode_wo(const nir_intrinsic_op opcode) const195 ESDOp EmitSSBOInstruction::get_opcode_wo(const nir_intrinsic_op opcode) const
196 {
197    switch (opcode) {
198    case nir_intrinsic_atomic_counter_add:
199       return DS_OP_ADD;
200    case nir_intrinsic_atomic_counter_and:
201       return DS_OP_AND;
202    case nir_intrinsic_atomic_counter_inc:
203       return DS_OP_INC;
204    case nir_intrinsic_atomic_counter_max:
205       return DS_OP_MAX_UINT;
206    case nir_intrinsic_atomic_counter_min:
207       return DS_OP_MIN_UINT;
208    case nir_intrinsic_atomic_counter_or:
209       return DS_OP_OR;
210    case nir_intrinsic_atomic_counter_xor:
211       return DS_OP_XOR;
212    case nir_intrinsic_atomic_counter_post_dec:
213       return DS_OP_DEC;
214    case nir_intrinsic_atomic_counter_comp_swap:
215       return DS_OP_CMP_XCHG_RET;
216    case nir_intrinsic_atomic_counter_exchange:
217       return DS_OP_XCHG_RET;
218    case nir_intrinsic_atomic_counter_pre_dec:
219    default:
220       return DS_OP_INVALID;
221    }
222 }
223 
224 RatInstruction::ERatOp
get_rat_opcode(const nir_intrinsic_op opcode,pipe_format format) const225 EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const
226 {
227    switch (opcode) {
228    case nir_intrinsic_ssbo_atomic_add:
229    case nir_intrinsic_image_atomic_add:
230       return RatInstruction::ADD_RTN;
231    case nir_intrinsic_ssbo_atomic_and:
232    case nir_intrinsic_image_atomic_and:
233       return RatInstruction::AND_RTN;
234    case nir_intrinsic_ssbo_atomic_exchange:
235    case nir_intrinsic_image_atomic_exchange:
236       return RatInstruction::XCHG_RTN;
237    case nir_intrinsic_ssbo_atomic_or:
238    case nir_intrinsic_image_atomic_or:
239       return RatInstruction::OR_RTN;
240    case nir_intrinsic_ssbo_atomic_imin:
241    case nir_intrinsic_image_atomic_imin:
242       return RatInstruction::MIN_INT_RTN;
243    case nir_intrinsic_ssbo_atomic_imax:
244    case nir_intrinsic_image_atomic_imax:
245       return RatInstruction::MAX_INT_RTN;
246    case nir_intrinsic_ssbo_atomic_umin:
247    case nir_intrinsic_image_atomic_umin:
248       return RatInstruction::MIN_UINT_RTN;
249    case nir_intrinsic_ssbo_atomic_umax:
250    case nir_intrinsic_image_atomic_umax:
251       return RatInstruction::MAX_UINT_RTN;
252    case nir_intrinsic_ssbo_atomic_xor:
253    case nir_intrinsic_image_atomic_xor:
254       return RatInstruction::XOR_RTN;
255    case nir_intrinsic_ssbo_atomic_comp_swap:
256    case nir_intrinsic_image_atomic_comp_swap:
257       if (util_format_is_float(format))
258          return RatInstruction::CMPXCHG_FLT_RTN;
259       else
260          return RatInstruction::CMPXCHG_INT_RTN;
261    case nir_intrinsic_image_load:
262       return RatInstruction::NOP_RTN;
263    default:
264       unreachable("Unsupported RAT instruction");
265    }
266 }
267 
268 RatInstruction::ERatOp
get_rat_opcode_wo(const nir_intrinsic_op opcode,pipe_format format) const269 EmitSSBOInstruction::get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format) const
270 {
271 	switch (opcode) {
272    case nir_intrinsic_ssbo_atomic_add:
273    case nir_intrinsic_image_atomic_add:
274       return RatInstruction::ADD;
275    case nir_intrinsic_ssbo_atomic_and:
276    case nir_intrinsic_image_atomic_and:
277       return RatInstruction::AND;
278    case nir_intrinsic_ssbo_atomic_or:
279    case nir_intrinsic_image_atomic_or:
280       return RatInstruction::OR;
281    case nir_intrinsic_ssbo_atomic_imin:
282    case nir_intrinsic_image_atomic_imin:
283       return RatInstruction::MIN_INT;
284    case nir_intrinsic_ssbo_atomic_imax:
285    case nir_intrinsic_image_atomic_imax:
286       return RatInstruction::MAX_INT;
287    case nir_intrinsic_ssbo_atomic_umin:
288    case nir_intrinsic_image_atomic_umin:
289       return RatInstruction::MIN_UINT;
290    case nir_intrinsic_ssbo_atomic_umax:
291    case nir_intrinsic_image_atomic_umax:
292       return RatInstruction::MAX_UINT;
293    case nir_intrinsic_ssbo_atomic_xor:
294    case nir_intrinsic_image_atomic_xor:
295       return RatInstruction::XOR;
296    case nir_intrinsic_ssbo_atomic_comp_swap:
297    case nir_intrinsic_image_atomic_comp_swap:
298       if (util_format_is_float(format))
299          return RatInstruction::CMPXCHG_FLT;
300       else
301          return RatInstruction::CMPXCHG_INT;
302    default:
303       unreachable("Unsupported WO RAT instruction");
304    }
305 }
306 
load_atomic_inc_limits()307 bool EmitSSBOInstruction::load_atomic_inc_limits()
308 {
309    m_atomic_update = get_temp_register();
310    m_atomic_update->set_keep_alive();
311    emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1),
312    {alu_write, alu_last_instr}));
313    return true;
314 }
315 
emit_atomic_inc(const nir_intrinsic_instr * instr)316 bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr)
317 {
318    bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
319    PValue uav_id = from_nir(instr->src[0], 0);
320    GPRVector dest = read_result ? make_dest(instr): GPRVector(0, {7,7,7,7});
321    auto ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest,
322                           m_atomic_update, uav_id,
323                           remap_atomic_base(nir_intrinsic_base(instr)));
324    emit_instruction(ir);
325    return true;
326 }
327 
emit_atomic_pre_dec(const nir_intrinsic_instr * instr)328 bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr)
329 {
330    GPRVector dest = make_dest(instr);
331 
332    PValue uav_id = from_nir(instr->src[0], 0);
333 
334    auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id,
335                           remap_atomic_base(nir_intrinsic_base(instr)));
336    emit_instruction(ir);
337 
338    emit_instruction(new AluInstruction(op2_sub_int,  dest.x(), dest.x(), literal(1), last_write));
339 
340    return true;
341 }
342 
emit_load_ssbo(const nir_intrinsic_instr * instr)343 bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr* instr)
344 {
345    GPRVector dest = make_dest(instr);
346 
347    /** src0 not used, should be some offset */
348    auto addr = from_nir(instr->src[1], 0);
349    PValue addr_temp = create_register_from_nir_src(instr->src[1], 1);
350 
351    /** Should be lowered in nir */
352    emit_instruction(new AluInstruction(op2_lshr_int, addr_temp, {addr, PValue(new LiteralValue(2))},
353                     {alu_write, alu_last_instr}));
354 
355    const EVTXDataFormat formats[4] = {
356       fmt_32,
357       fmt_32_32,
358       fmt_32_32_32,
359       fmt_32_32_32_32
360    };
361 
362    const std::array<int,4> dest_swt[4] = {
363       {0,7,7,7},
364       {0,1,7,7},
365       {0,1,2,7},
366       {0,1,2,3}
367    };
368 
369    /* TODO fix resource index */
370    auto ir = new FetchInstruction(dest, addr_temp,
371                                   R600_IMAGE_REAL_RESOURCE_OFFSET + m_ssbo_image_offset
372                                   , from_nir(instr->src[0], 0),
373                                   formats[nir_dest_num_components(instr->dest) - 1], vtx_nf_int);
374    ir->set_dest_swizzle(dest_swt[nir_dest_num_components(instr->dest) - 1]);
375    ir->set_flag(vtx_use_tc);
376 
377    emit_instruction(ir);
378    return true;
379 }
380 
emit_store_ssbo(const nir_intrinsic_instr * instr)381 bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr)
382 {
383 
384    GPRVector::Swizzle swz = {7,7,7,7};
385    for (unsigned i = 0; i <  nir_src_num_components(instr->src[0]); ++i)
386       swz[i] = i;
387 
388    auto orig_addr = from_nir(instr->src[2], 0);
389 
390    GPRVector addr_vec = get_temp_vec4({0,1,2,7});
391 
392    auto temp2 = get_temp_vec4();
393 
394    auto rat_id = from_nir(instr->src[1], 0);
395 
396    emit_instruction(new AluInstruction(op2_lshr_int, addr_vec.reg_i(0), orig_addr,
397                                        PValue(new LiteralValue(2)), write));
398    emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(1), Value::zero, write));
399    emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(2), Value::zero, last_write));
400 
401 
402    auto values = vec_from_nir_with_fetch_constant(instr->src[0],
403          (1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true);
404 
405    auto cf_op = cf_mem_rat;
406    //auto cf_op = nir_intrinsic_access(instr) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
407    auto store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
408                                    values, addr_vec, m_ssbo_image_offset, rat_id, 1,
409                                    1, 0, false);
410    emit_instruction(store);
411    m_store_ops.push_back(store);
412 
413    for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) {
414       emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), get_chip_class() == CAYMAN  ?  last_write : write));
415       emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0),
416                                           {addr_vec.reg_i(0), Value::one_i}, last_write));
417       store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
418                                  temp2, addr_vec, m_ssbo_image_offset, rat_id, 1,
419                                  1, 0, false);
420       emit_instruction(store);
421       if (!(nir_intrinsic_access(instr) & ACCESS_COHERENT))
422          m_store_ops.push_back(store);
423    }
424 
425    return true;
426 }
427 
428 bool
emit_image_store(const nir_intrinsic_instr * intrin)429 EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin)
430 {
431    int imageid = 0;
432    PValue image_offset;
433 
434    if (nir_src_is_const(intrin->src[0]))
435       imageid = nir_src_as_int(intrin->src[0]);
436    else
437       image_offset = from_nir(intrin->src[0], 0);
438 
439    auto coord =  vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3});
440    auto undef = from_nir(intrin->src[2], 0);
441    auto value = vec_from_nir_with_fetch_constant(intrin->src[3],  0xf, {0,1,2,3});
442    auto unknown  = from_nir(intrin->src[4], 0);
443 
444    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
445        nir_intrinsic_image_array(intrin)) {
446       emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
447       emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
448    }
449 
450    auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
451    auto store = new RatInstruction(op, RatInstruction::STORE_TYPED, value, coord, imageid,
452                                    image_offset, 1, 0xf, 0, false);
453 
454    //if (!(nir_intrinsic_access(intrin) & ACCESS_COHERENT))
455       m_store_ops.push_back(store);
456 
457    emit_instruction(store);
458    return true;
459 }
460 
461 bool
emit_ssbo_atomic_op(const nir_intrinsic_instr * intrin)462 EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin)
463 {
464    int imageid = 0;
465    PValue image_offset;
466 
467    if (nir_src_is_const(intrin->src[0]))
468       imageid = nir_src_as_int(intrin->src[0]);
469    else
470       image_offset = from_nir(intrin->src[0], 0);
471 
472    bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
473    auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) :
474                                get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
475 
476    auto coord_orig =  from_nir(intrin->src[1], 0, 0);
477    auto coord = get_temp_register(0);
478 
479    emit_instruction(new AluInstruction(op2_lshr_int, coord, coord_orig, literal(2), last_write));
480 
481    if (intrin->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
482       emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
483                                           from_nir(intrin->src[3], 0), {alu_write}));
484       emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3),
485                                           from_nir(intrin->src[2], 0), {alu_last_instr, alu_write}));
486    } else {
487       emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
488                                           from_nir(intrin->src[2], 0), {alu_write}));
489       emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write));
490    }
491 
492 
493    GPRVector out_vec({coord, coord, coord, coord});
494 
495    auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid + m_ssbo_image_offset,
496                                    image_offset, 1, 0xf, 0, true);
497    emit_instruction(atomic);
498 
499    if (read_result) {
500       emit_instruction(new WaitAck(0));
501 
502       GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components);
503       auto fetch = new FetchInstruction(vc_fetch,
504                                         no_index_offset,
505                                         fmt_32,
506                                         vtx_nf_int,
507                                         vtx_es_none,
508                                         m_rat_return_address.reg_i(1),
509                                         dest,
510                                         0,
511                                         false,
512                                         0xf,
513                                         R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
514                                         0,
515                                         bim_none,
516                                         false,
517                                         false,
518                                         0,
519                                         0,
520                                         0,
521                                         image_offset,
522                                         {0,7,7,7});
523       fetch->set_flag(vtx_srf_mode);
524       fetch->set_flag(vtx_use_tc);
525       fetch->set_flag(vtx_vpm);
526       emit_instruction(fetch);
527    }
528 
529    return true;
530 
531 }
532 
533 bool
emit_image_load(const nir_intrinsic_instr * intrin)534 EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin)
535 {
536    int imageid = 0;
537    PValue image_offset;
538 
539    if (nir_src_is_const(intrin->src[0]))
540       imageid = nir_src_as_int(intrin->src[0]);
541    else
542       image_offset = from_nir(intrin->src[0], 0);
543 
544    bool read_retvalue = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
545    auto rat_op = read_retvalue ? get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin)):
546                                  get_rat_opcode_wo(intrin->intrinsic, nir_intrinsic_format(intrin));
547 
548    GPRVector::Swizzle swz = {0,1,2,3};
549    auto coord =  vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz);
550 
551    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
552        nir_intrinsic_image_array(intrin)) {
553       emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
554       emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
555    }
556 
557    if (intrin->intrinsic != nir_intrinsic_image_load) {
558       if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
559          emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
560                                              from_nir(intrin->src[4], 0), {alu_write}));
561          emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3),
562                                              from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
563       } else {
564          emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
565                                              from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
566       }
567    }
568    auto cf_op = cf_mem_rat;// nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
569 
570    auto store = new RatInstruction(cf_op, rat_op, m_rat_return_address, coord, imageid,
571                                    image_offset, 1, 0xf, 0, true);
572    emit_instruction(store);
573    return read_retvalue ? fetch_return_value(intrin) : true;
574 }
575 
fetch_return_value(const nir_intrinsic_instr * intrin)576 bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin)
577 {
578    emit_instruction(new WaitAck(0));
579 
580    pipe_format format = nir_intrinsic_format(intrin);
581    unsigned fmt = fmt_32;
582    unsigned num_format = 0;
583    unsigned format_comp = 0;
584    unsigned endian = 0;
585 
586    int imageid = 0;
587    PValue image_offset;
588 
589    if (nir_src_is_const(intrin->src[0]))
590       imageid = nir_src_as_int(intrin->src[0]);
591    else
592       image_offset = from_nir(intrin->src[0], 0);
593 
594    r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
595 
596    GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
597 
598    auto fetch = new FetchInstruction(vc_fetch,
599                                      no_index_offset,
600                                      (EVTXDataFormat)fmt,
601                                      (EVFetchNumFormat)num_format,
602                                      (EVFetchEndianSwap)endian,
603                                      m_rat_return_address.reg_i(1),
604                                      dest,
605                                      0,
606                                      false,
607                                      0x3,
608                                      R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
609                                      0,
610                                      bim_none,
611                                      false,
612                                      false,
613                                      0,
614                                      0,
615                                      0,
616                                      image_offset, {0,1,2,3});
617    fetch->set_flag(vtx_srf_mode);
618    fetch->set_flag(vtx_use_tc);
619    fetch->set_flag(vtx_vpm);
620    if (format_comp)
621       fetch->set_flag(vtx_format_comp_signed);
622 
623    emit_instruction(fetch);
624    return true;
625 }
626 
emit_image_size(const nir_intrinsic_instr * intrin)627 bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr *intrin)
628 {
629    GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
630    GPRVector src{0,{4,4,4,4}};
631 
632    assert(nir_src_as_uint(intrin->src[1]) == 0);
633 
634    auto const_offset = nir_src_as_const_value(intrin->src[0]);
635    auto dyn_offset = PValue();
636    int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
637    if (const_offset)
638       res_id += const_offset[0].u32;
639    else
640       dyn_offset = from_nir(intrin->src[0], 0);
641 
642    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
643       emit_instruction(new FetchInstruction(dest, PValue(new GPRValue(0, 7)),
644                        res_id,
645                        bim_none));
646       return true;
647    } else {
648       emit_instruction(new TexInstruction(TexInstruction::get_resinfo, dest, src,
649                                              0/* ?? */,
650                                              res_id, dyn_offset));
651       if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
652           nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
653          /* Need to load the layers from a const buffer */
654 
655          set_has_txs_cube_array_comp();
656 
657          if (const_offset) {
658             unsigned lookup_resid = const_offset[0].u32;
659             emit_instruction(new AluInstruction(op1_mov, dest.reg_i(2),
660                                                 PValue(new UniformValue(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4,
661                                                                         R600_BUFFER_INFO_CONST_BUFFER)),
662                                                 EmitInstruction::last_write));
663          } else {
664             /* If the adressing is indirect we have to get the z-value by using a binary search */
665             GPRVector trgt;
666             GPRVector help;
667 
668             auto addr = help.reg_i(0);
669             auto comp = help.reg_i(1);
670             auto low_bit = help.reg_i(2);
671             auto high_bit = help.reg_i(3);
672 
673             emit_instruction(new AluInstruction(op2_lshr_int, addr, from_nir(intrin->src[0], 0),
674                              literal(2), EmitInstruction::write));
675             emit_instruction(new AluInstruction(op2_and_int, comp, from_nir(intrin->src[0], 0),
676                              literal(3), EmitInstruction::last_write));
677 
678             emit_instruction(new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, R600_SHADER_BUFFER_INFO_SEL,
679                                                   R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none));
680 
681             emit_instruction(new AluInstruction(op3_cnde_int, comp, high_bit, trgt.reg_i(0), trgt.reg_i(2),
682                                                 EmitInstruction::write));
683             emit_instruction(new AluInstruction(op3_cnde_int, high_bit, high_bit, trgt.reg_i(1), trgt.reg_i(3),
684                                                 EmitInstruction::last_write));
685 
686             emit_instruction(new AluInstruction(op3_cnde_int, dest.reg_i(2), low_bit, comp, high_bit, EmitInstruction::last_write));
687          }
688       }
689    }
690    return true;
691 }
692 
emit_buffer_size(const nir_intrinsic_instr * intr)693 bool EmitSSBOInstruction::emit_buffer_size(const nir_intrinsic_instr *intr)
694 {
695    std::array<PValue,4> dst_elms;
696 
697 
698    for (uint16_t i = 0; i < 4; ++i) {
699       dst_elms[i] = from_nir(intr->dest, (i < intr->dest.ssa.num_components) ? i : 7);
700    }
701 
702    GPRVector dst(dst_elms);
703    GPRVector src(0,{4,4,4,4});
704 
705    auto const_offset = nir_src_as_const_value(intr->src[0]);
706    auto dyn_offset = PValue();
707    int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
708    if (const_offset)
709       res_id += const_offset[0].u32;
710    else
711       assert(0 && "dynamic buffer offset not supported in buffer_size");
712 
713    emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
714                     res_id, bim_none));
715 
716    return true;
717 }
718 
make_stores_ack_and_waitack()719 bool EmitSSBOInstruction::make_stores_ack_and_waitack()
720 {
721    for (auto&& store: m_store_ops)
722       store->set_ack();
723 
724    if (!m_store_ops.empty())
725       emit_instruction(new WaitAck(0));
726 
727    m_store_ops.clear();
728 
729    return true;
730 }
731 
make_dest(const nir_intrinsic_instr * ir)732 GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir)
733 {
734    GPRVector::Values v;
735    int i;
736    for (i = 0; i < 4; ++i)
737       v[i] = from_nir(ir->dest, i);
738    return GPRVector(v);
739 }
740 
741 }
742