• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_instr_mem.h"
28 #include "sfn_instr_fetch.h"
29 #include "sfn_instr_tex.h"
30 #include "sfn_shader.h"
31 
32 namespace r600 {
33 
34 
GDSInstr(ESDOp op,Register * dest,const RegisterVec4 & src,int uav_base,PRegister uav_id)35 GDSInstr::GDSInstr(ESDOp op, Register *dest,
36                    const RegisterVec4& src, int uav_base,
37                    PRegister uav_id):
38    m_op(op),
39    m_dest(dest),
40    m_src(src),
41    m_uav_base(uav_base),
42    m_uav_id(uav_id)
43 {
44    set_always_keep();
45 
46    m_src.add_use(this);
47    m_dest->add_parent(this);
48 
49    if (m_uav_id)
50       m_uav_id->add_use(this);
51 }
52 
is_equal_to(const GDSInstr & rhs) const53 bool GDSInstr::is_equal_to(const GDSInstr& rhs) const
54 {
55 #define NE(X) (X != rhs. X)
56 
57    if (NE(m_op) ||
58        NE(m_src) ||
59        NE(m_uav_base))
60       return false;
61 
62    sfn_value_equal(m_dest, rhs.m_dest);
63 
64    return sfn_value_equal(m_uav_id, rhs.m_uav_id);
65 }
66 
accept(ConstInstrVisitor & visitor) const67 void GDSInstr::accept(ConstInstrVisitor& visitor) const
68 {
69    visitor.visit(*this);
70 }
71 
accept(InstrVisitor & visitor)72 void GDSInstr::accept(InstrVisitor& visitor)
73 {
74    visitor.visit(this);
75 }
76 
do_ready() const77 bool GDSInstr::do_ready() const
78 {
79    return m_src.ready(block_id(), index()) &&
80          (!m_uav_id || m_uav_id->ready(block_id(), index()));
81 }
82 
do_print(std::ostream & os) const83 void GDSInstr::do_print(std::ostream& os) const
84 {
85    os << "GDS " << lds_ops.at(m_op).name
86       << *m_dest;
87    os << " " << m_src;
88    os << " BASE:" << m_uav_base;
89 
90    if (m_uav_id)
91       os << " UAV:" << *m_uav_id;
92 }
93 
emit_atomic_counter(nir_intrinsic_instr * intr,Shader & shader)94 bool GDSInstr::emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader)
95 {
96    switch (intr->intrinsic) {
97    case nir_intrinsic_atomic_counter_add:
98    case nir_intrinsic_atomic_counter_and:
99    case nir_intrinsic_atomic_counter_exchange:
100    case nir_intrinsic_atomic_counter_max:
101    case nir_intrinsic_atomic_counter_min:
102    case nir_intrinsic_atomic_counter_or:
103    case nir_intrinsic_atomic_counter_xor:
104    case nir_intrinsic_atomic_counter_comp_swap:
105       return emit_atomic_op2(intr, shader);
106    case nir_intrinsic_atomic_counter_read:
107    case nir_intrinsic_atomic_counter_post_dec:
108       return emit_atomic_read(intr, shader);
109    case nir_intrinsic_atomic_counter_inc:
110       return emit_atomic_inc(intr, shader);
111    case nir_intrinsic_atomic_counter_pre_dec:
112       return emit_atomic_pre_dec(intr, shader);
113    default:
114       return false;
115    }
116 }
117 
get_opcode(const nir_intrinsic_op opcode)118 static ESDOp get_opcode(const nir_intrinsic_op opcode)
119 {
120    switch (opcode) {
121    case nir_intrinsic_atomic_counter_add:
122       return DS_OP_ADD_RET;
123    case nir_intrinsic_atomic_counter_and:
124       return DS_OP_AND_RET;
125    case nir_intrinsic_atomic_counter_exchange:
126       return DS_OP_XCHG_RET;
127    case nir_intrinsic_atomic_counter_inc:
128       return DS_OP_INC_RET;
129    case nir_intrinsic_atomic_counter_max:
130       return DS_OP_MAX_UINT_RET;
131    case nir_intrinsic_atomic_counter_min:
132       return DS_OP_MIN_UINT_RET;
133    case nir_intrinsic_atomic_counter_or:
134       return DS_OP_OR_RET;
135    case nir_intrinsic_atomic_counter_read:
136       return DS_OP_READ_RET;
137    case nir_intrinsic_atomic_counter_xor:
138       return DS_OP_XOR_RET;
139    case nir_intrinsic_atomic_counter_post_dec:
140       return DS_OP_DEC_RET;
141    case nir_intrinsic_atomic_counter_comp_swap:
142       return DS_OP_CMP_XCHG_RET;
143    case nir_intrinsic_atomic_counter_pre_dec:
144    default:
145       return DS_OP_INVALID;
146    }
147 }
148 
get_opcode_wo(const nir_intrinsic_op opcode)149 static ESDOp get_opcode_wo(const nir_intrinsic_op opcode)
150 {
151    switch (opcode) {
152    case nir_intrinsic_atomic_counter_add:
153       return DS_OP_ADD;
154    case nir_intrinsic_atomic_counter_and:
155       return DS_OP_AND;
156    case nir_intrinsic_atomic_counter_inc:
157       return DS_OP_INC;
158    case nir_intrinsic_atomic_counter_max:
159       return DS_OP_MAX_UINT;
160    case nir_intrinsic_atomic_counter_min:
161       return DS_OP_MIN_UINT;
162    case nir_intrinsic_atomic_counter_or:
163       return DS_OP_OR;
164    case nir_intrinsic_atomic_counter_xor:
165       return DS_OP_XOR;
166    case nir_intrinsic_atomic_counter_post_dec:
167       return DS_OP_DEC;
168    case nir_intrinsic_atomic_counter_comp_swap:
169       return DS_OP_CMP_XCHG_RET;
170    case nir_intrinsic_atomic_counter_exchange:
171       return DS_OP_XCHG_RET;
172    case nir_intrinsic_atomic_counter_pre_dec:
173    default:
174       return DS_OP_INVALID;
175    }
176 }
177 
178 
emit_atomic_op2(nir_intrinsic_instr * instr,Shader & shader)179 bool GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader)
180 {
181    auto& vf = shader.value_factory();
182    bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
183 
184    ESDOp op = read_result ? get_opcode(instr->intrinsic) :
185                             get_opcode_wo(instr->intrinsic);
186 
187    if (DS_OP_INVALID == op)
188       return false;
189 
190    auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
191    offset += nir_intrinsic_base(instr);
192 
193    auto dest = vf.dest(instr->dest, 0, pin_free);
194 
195    PRegister src_as_register = nullptr;
196    auto src_val = vf.src(instr->src[1], 0);
197    if (!src_val->as_register()) {
198       auto temp_src_val = vf.temp_register();
199       shader.emit_instruction(new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::last_write));
200       src_as_register = temp_src_val;
201    } else
202       src_as_register = src_val->as_register();
203 
204    if (uav_id != nullptr)
205       shader.set_flag(Shader::sh_indirect_atomic);
206 
207    GDSInstr *ir = nullptr;
208    if (shader.chip_class() < ISA_CC_CAYMAN) {
209       RegisterVec4 src(nullptr, src_as_register, nullptr, nullptr, pin_free);
210       ir = new GDSInstr(op, dest, src, offset, uav_id);
211 
212    } else {
213       auto dest = vf.dest(instr->dest, 0, pin_free);
214       auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
215       if (uav_id)
216          shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
217                                  AluInstr::write));
218       else
219          shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
220       shader.emit_instruction(new AluInstr(op1_mov, tmp[1], src_val, AluInstr::last_write));
221       ir = new GDSInstr(op, dest, tmp, 0, nullptr);
222    }
223    shader.emit_instruction(ir);
224    return true;
225 }
226 
emit_atomic_read(nir_intrinsic_instr * instr,Shader & shader)227 bool GDSInstr::emit_atomic_read(nir_intrinsic_instr *instr, Shader& shader)
228 {
229    auto& vf = shader.value_factory();
230 
231    auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
232    offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
233 
234    auto dest = vf.dest(instr->dest, 0, pin_free);
235 
236    GDSInstr *ir = nullptr;
237 
238    if (shader.chip_class() < ISA_CC_CAYMAN) {
239       RegisterVec4 src = RegisterVec4(0, true, {7,7,7,7});
240       ir = new GDSInstr(DS_OP_READ_RET, dest, src, offset, uav_id);
241    } else {
242       auto tmp = vf.temp_vec4(pin_group, {0, 7, 7, 7});
243       if (uav_id)
244          shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
245                                  AluInstr::write));
246       else
247          shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
248 
249       ir = new GDSInstr(DS_OP_READ_RET, dest, tmp, 0, nullptr);
250    }
251 
252    shader.emit_instruction(ir);
253    return true;
254 }
255 
emit_atomic_inc(nir_intrinsic_instr * instr,Shader & shader)256 bool GDSInstr::emit_atomic_inc(nir_intrinsic_instr *instr, Shader& shader)
257 {
258    auto& vf = shader.value_factory();
259    bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
260 
261    auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
262    offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
263 
264    GDSInstr *ir = nullptr;
265 
266    if (shader.chip_class() < ISA_CC_CAYMAN) {
267       auto dest = vf.dest(instr->dest, 0, pin_free);
268       RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
269       ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD,
270                              dest, src, offset, uav_id);
271    } else {
272       auto dest = vf.dest(instr->dest, 0, pin_free);
273       auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
274 
275       if (uav_id)
276          shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
277                                  AluInstr::write));
278       else
279          shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
280 
281       shader.emit_instruction(new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
282       ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD,
283                         dest, tmp, 0, nullptr);
284    }
285    shader.emit_instruction(ir);
286    return true;
287 }
288 
emit_atomic_pre_dec(nir_intrinsic_instr * instr,Shader & shader)289 bool GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader)
290 {
291    auto& vf = shader.value_factory();
292 
293    auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
294    offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
295 
296    auto *tmp_dest = vf.temp_register();
297 
298    GDSInstr *ir = nullptr;
299 
300    if (shader.chip_class() < ISA_CC_CAYMAN) {
301       RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
302       ir = new GDSInstr(DS_OP_SUB_RET, tmp_dest, src, offset, uav_id);
303    } else {
304       auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
305       if (uav_id)
306          shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
307                                  AluInstr::write));
308       else
309          shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
310 
311       shader.emit_instruction(new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
312       ir = new GDSInstr(DS_OP_SUB_RET, tmp_dest, tmp, 0, nullptr);
313    }
314 
315    shader.emit_instruction(ir);
316    shader.emit_instruction(new AluInstr(op2_sub_int,  vf.dest(instr->dest, 0, pin_free),
317                                         tmp_dest, vf.one_i(), AluInstr::last_write));
318    return true;
319 }
320 
321 
RatInstr(ECFOpCode cf_opcode,ERatOp rat_op,const RegisterVec4 & data,const RegisterVec4 & index,int rat_id,PRegister rat_id_offset,int burst_count,int comp_mask,int element_size)322 RatInstr::RatInstr(ECFOpCode cf_opcode, ERatOp rat_op,
323                    const RegisterVec4& data, const RegisterVec4& index,
324                    int rat_id, PRegister rat_id_offset,
325                    int burst_count, int comp_mask, int element_size):
326    m_cf_opcode(cf_opcode),
327    m_rat_op(rat_op),
328    m_data(data),
329    m_index(index),
330    m_rat_id_offset(rat_id_offset),
331    m_rat_id(rat_id),
332    m_burst_count(burst_count),
333    m_comp_mask(comp_mask),
334    m_element_size(element_size)
335 {
336    set_always_keep();
337 
338    m_data.add_use(this);
339    m_index.add_use(this);
340    if (m_rat_id_offset)
341       m_rat_id_offset->add_use(this);
342 }
343 
344 
accept(ConstInstrVisitor & visitor) const345 void RatInstr::accept(ConstInstrVisitor& visitor) const
346 {
347    visitor.visit(*this);
348 }
349 
accept(InstrVisitor & visitor)350 void RatInstr::accept(InstrVisitor& visitor)
351 {
352    visitor.visit(this);
353 }
354 
is_equal_to(const RatInstr & lhs) const355 bool RatInstr::is_equal_to(const RatInstr& lhs) const
356 {
357    (void)lhs;
358    assert(0);
359    return false;
360 }
361 
do_ready() const362 bool RatInstr::do_ready() const
363 {
364    if (m_rat_op != STORE_TYPED) {
365       for (auto i: required_instr()) {
366          if (!i->is_scheduled()) {
367             return false;
368          }
369       }
370    }
371 
372    return m_data.ready(block_id(), index()) &&
373          m_index.ready(block_id(), index());
374 }
375 
do_print(std::ostream & os) const376 void RatInstr::do_print(std::ostream& os) const
377 {
378    os << "MEM_RAT RAT " << m_rat_id;
379    if (m_rat_id_offset)
380       os << "+" << *m_rat_id_offset;
381    os << " @" << m_index;
382    os << " OP:" << m_rat_op << " " << m_data;
383    os << " BC:" << m_burst_count
384       << " MASK:" << m_comp_mask
385       << " ES:" << m_element_size;
386    if (m_need_ack)
387       os << " ACK";
388 }
389 
390 static RatInstr::ERatOp
get_rat_opcode(const nir_intrinsic_op opcode,pipe_format format)391 get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format)
392 {
393 	switch (opcode) {
394    case nir_intrinsic_image_load:
395       return RatInstr::NOP_RTN;
396    case nir_intrinsic_ssbo_atomic_add:
397    case nir_intrinsic_image_atomic_add:
398       return RatInstr::ADD_RTN;
399    case nir_intrinsic_ssbo_atomic_and:
400    case nir_intrinsic_image_atomic_and:
401       return RatInstr::AND_RTN;
402    case nir_intrinsic_ssbo_atomic_or:
403    case nir_intrinsic_image_atomic_or:
404       return RatInstr::OR_RTN;
405    case nir_intrinsic_ssbo_atomic_imin:
406    case nir_intrinsic_image_atomic_imin:
407       return RatInstr::MIN_INT_RTN;
408    case nir_intrinsic_ssbo_atomic_imax:
409    case nir_intrinsic_image_atomic_imax:
410       return RatInstr::MAX_INT_RTN;
411    case nir_intrinsic_ssbo_atomic_umin:
412    case nir_intrinsic_image_atomic_umin:
413       return RatInstr::MIN_UINT_RTN;
414    case nir_intrinsic_ssbo_atomic_umax:
415    case nir_intrinsic_image_atomic_umax:
416       return RatInstr::MAX_UINT_RTN;
417    case nir_intrinsic_ssbo_atomic_xor:
418    case nir_intrinsic_image_atomic_xor:
419       return RatInstr::XOR_RTN;
420    case nir_intrinsic_ssbo_atomic_comp_swap:
421    case nir_intrinsic_image_atomic_comp_swap:
422       if (util_format_is_float(format))
423          return RatInstr::CMPXCHG_FLT_RTN;
424       else
425          return RatInstr::CMPXCHG_INT_RTN;
426    case nir_intrinsic_ssbo_atomic_exchange:
427    case nir_intrinsic_image_atomic_exchange:
428       return RatInstr::XCHG_RTN;
429    default:
430       unreachable("Unsupported WO RAT instruction");
431    }
432 }
433 
434 static RatInstr::ERatOp
get_rat_opcode_wo(const nir_intrinsic_op opcode,pipe_format format)435 get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format)
436 {
437 	switch (opcode) {
438    case nir_intrinsic_ssbo_atomic_add:
439    case nir_intrinsic_image_atomic_add:
440       return RatInstr::ADD;
441    case nir_intrinsic_ssbo_atomic_and:
442    case nir_intrinsic_image_atomic_and:
443       return RatInstr::AND;
444    case nir_intrinsic_ssbo_atomic_or:
445    case nir_intrinsic_image_atomic_or:
446       return RatInstr::OR;
447    case nir_intrinsic_ssbo_atomic_imin:
448    case nir_intrinsic_image_atomic_imin:
449       return RatInstr::MIN_INT;
450    case nir_intrinsic_ssbo_atomic_imax:
451    case nir_intrinsic_image_atomic_imax:
452       return RatInstr::MAX_INT;
453    case nir_intrinsic_ssbo_atomic_umin:
454    case nir_intrinsic_image_atomic_umin:
455       return RatInstr::MIN_UINT;
456    case nir_intrinsic_ssbo_atomic_umax:
457    case nir_intrinsic_image_atomic_umax:
458       return RatInstr::MAX_UINT;
459    case nir_intrinsic_ssbo_atomic_xor:
460    case nir_intrinsic_image_atomic_xor:
461       return RatInstr::XOR;
462    case nir_intrinsic_ssbo_atomic_comp_swap:
463    case nir_intrinsic_image_atomic_comp_swap:
464       if (util_format_is_float(format))
465          return RatInstr::CMPXCHG_FLT;
466       else
467          return RatInstr::CMPXCHG_INT;
468    case nir_intrinsic_ssbo_atomic_exchange:
469    case nir_intrinsic_image_atomic_exchange:
470       return RatInstr::XCHG_RTN;
471    default:
472       unreachable("Unsupported WO RAT instruction");
473    }
474 }
475 
emit(nir_intrinsic_instr * intr,Shader & shader)476 bool RatInstr::emit(nir_intrinsic_instr *intr, Shader& shader)
477 {
478    switch (intr->intrinsic) {
479    case nir_intrinsic_load_ssbo:
480       return emit_ssbo_load(intr, shader);
481    case nir_intrinsic_store_ssbo:
482       return emit_ssbo_store(intr, shader);
483    case nir_intrinsic_ssbo_atomic_add:
484    case nir_intrinsic_ssbo_atomic_comp_swap:
485    case nir_intrinsic_ssbo_atomic_or:
486    case nir_intrinsic_ssbo_atomic_xor:
487    case nir_intrinsic_ssbo_atomic_imax:
488    case nir_intrinsic_ssbo_atomic_imin:
489    case nir_intrinsic_ssbo_atomic_umax:
490    case nir_intrinsic_ssbo_atomic_umin:
491    case nir_intrinsic_ssbo_atomic_and:
492    case nir_intrinsic_ssbo_atomic_exchange:
493       return emit_ssbo_atomic_op(intr, shader);
494    case nir_intrinsic_image_store:
495       return emit_image_store(intr, shader);
496    case nir_intrinsic_image_load:
497    case nir_intrinsic_image_atomic_add:
498    case nir_intrinsic_image_atomic_and:
499    case nir_intrinsic_image_atomic_or:
500    case nir_intrinsic_image_atomic_xor:
501    case nir_intrinsic_image_atomic_exchange:
502    case nir_intrinsic_image_atomic_comp_swap:
503    case nir_intrinsic_image_atomic_umin:
504    case nir_intrinsic_image_atomic_umax:
505    case nir_intrinsic_image_atomic_imin:
506    case nir_intrinsic_image_atomic_imax:
507       return emit_image_load_or_atomic(intr, shader);
508    case nir_intrinsic_image_size:
509       return emit_image_size(intr, shader);
510    case nir_intrinsic_get_ssbo_size:
511       return emit_ssbo_size(intr, shader);
512    default:
513       return false;
514    }
515 }
516 
emit_ssbo_load(nir_intrinsic_instr * intr,Shader & shader)517 bool RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader)
518 {
519    auto &vf = shader.value_factory();
520    auto dest = vf.dest_vec4(intr->dest, pin_group);
521 
522    /** src0 not used, should be some offset */
523    auto addr = vf.src(intr->src[1], 0);
524    auto addr_temp = vf.temp_register();
525 
526    /** Should be lowered in nir */
527    shader.emit_instruction(new AluInstr(op2_lshr_int, addr_temp, addr, vf.literal(2),
528                                         {alu_write, alu_last_instr}));
529 
530    const EVTXDataFormat formats[4] = {
531       fmt_32,
532       fmt_32_32,
533       fmt_32_32_32,
534       fmt_32_32_32_32
535    };
536 
537    RegisterVec4::Swizzle dest_swz[4] = {
538       {0,7,7,7},
539       {0,1,7,7},
540       {0,1,2,7},
541       {0,1,2,3}
542    };
543 
544    int comp_idx = nir_dest_num_components(intr->dest) - 1;
545 
546    auto [offset, res_offset] = shader.evaluate_resource_offset(intr, 0); {}
547 
548    auto res_id =  R600_IMAGE_REAL_RESOURCE_OFFSET + offset +
549                   shader.ssbo_image_offset();
550 
551    auto ir = new LoadFromBuffer(dest, dest_swz[comp_idx], addr_temp,  0,
552                                 res_id, res_offset, formats[comp_idx]);
553    ir->set_fetch_flag(FetchInstr::use_tc);
554    ir->set_num_format(vtx_nf_int);
555 
556    shader.emit_instruction(ir);
557    return true;
558 }
559 
emit_ssbo_store(nir_intrinsic_instr * instr,Shader & shader)560 bool RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader)
561 {
562    auto &vf = shader.value_factory();
563    auto orig_addr = vf.src(instr->src[2], 0);
564 
565    auto addr_base = vf.temp_register();
566 
567    auto [offset, rat_id] = shader.evaluate_resource_offset(instr, 1);
568 
569    shader.emit_instruction(new AluInstr(op2_lshr_int, addr_base, orig_addr,
570                            vf.literal(2), AluInstr::write));
571 
572    for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) {
573       auto addr_vec = vf.temp_vec4(pin_group, {0,1,2,7});
574       if (i == 0) {
575          shader.emit_instruction(new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::last_write));
576       } else {
577          shader.emit_instruction(new AluInstr(op2_add_int, addr_vec[0], addr_base,
578                                  vf.literal(i),
579                                  AluInstr::last_write));
580       }
581       auto value = vf.src(instr->src[0], i);
582       PRegister v = vf.temp_register(0);
583       shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::last_write));
584       auto value_vec = RegisterVec4(v, nullptr, nullptr, nullptr, pin_chan);
585       auto store = new RatInstr(cf_mem_rat, RatInstr::STORE_TYPED,
586                                 value_vec, addr_vec, offset + shader.ssbo_image_offset(),
587                                 rat_id, 1, 1, 0);
588       shader.emit_instruction(store);
589    }
590 
591    return true;
592 }
593 
emit_ssbo_atomic_op(nir_intrinsic_instr * intr,Shader & shader)594 bool RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader)
595 {
596    auto& vf = shader.value_factory();
597    auto [imageid, image_offset] = shader.evaluate_resource_offset(intr, 0); {}
598 
599    bool read_result = !intr->dest.is_ssa || !list_is_empty(&intr->dest.ssa.uses);
600    auto opcode = read_result ? get_rat_opcode(intr->intrinsic, PIPE_FORMAT_R32_UINT) :
601                                get_rat_opcode_wo(intr->intrinsic, PIPE_FORMAT_R32_UINT);
602 
603    auto coord_orig =  vf.src(intr->src[1], 0);
604    auto coord = vf.temp_register(0);
605 
606    auto data_vec4 = vf.temp_vec4(pin_group, {0,1,2,3});
607 
608    shader.emit_instruction(new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::last_write));
609 
610    shader.emit_instruction(new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
611 
612 
613    if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
614       shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[3], 0), AluInstr::write));
615       shader.emit_instruction(new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
616                                            vf.src(intr->src[2], 0), {alu_last_instr, alu_write}));
617    } else {
618       shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::last_write));
619    }
620 
621 
622    RegisterVec4 out_vec(coord, coord, coord, coord, pin_group);
623 
624    auto atomic = new RatInstr(cf_mem_rat, opcode, data_vec4, out_vec, imageid + shader.ssbo_image_offset(),
625                               image_offset, 1, 0xf, 0);
626    shader.emit_instruction(atomic);
627 
628    atomic->set_ack();
629    if (read_result) {
630       atomic->set_instr_flag(ack_rat_return_write);
631       auto dest = vf.dest_vec4(intr->dest, pin_group);
632 
633       auto fetch = new FetchInstr(vc_fetch,
634                                   dest, {0, 1, 2, 3},
635                                   shader.rat_return_address(),
636                                   0,
637                                   no_index_offset,
638                                   fmt_32,
639                                   vtx_nf_int,
640                                   vtx_es_none,
641                                   R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
642                                   image_offset);
643       fetch->set_mfc(15);
644       fetch->set_fetch_flag(FetchInstr::srf_mode);
645       fetch->set_fetch_flag(FetchInstr::use_tc);
646       fetch->set_fetch_flag(FetchInstr::vpm);
647       fetch->set_fetch_flag(FetchInstr::wait_ack);
648       fetch->add_required_instr(atomic);
649       shader.chain_ssbo_read(fetch);
650       shader.emit_instruction(fetch);
651    }
652 
653    return true;
654 
655 }
656 
emit_ssbo_size(nir_intrinsic_instr * intr,Shader & shader)657 bool RatInstr::emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader)
658 {
659    auto& vf = shader.value_factory();
660    auto dest = vf.dest_vec4(intr->dest, pin_group);
661 
662    auto const_offset = nir_src_as_const_value(intr->src[0]);
663    int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
664    if (const_offset)
665       res_id += const_offset[0].u32;
666    else
667       assert(0 && "dynamic buffer offset not supported in buffer_size");
668 
669    shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,1,2,3},res_id));
670    return true;
671 }
672 
emit_image_store(nir_intrinsic_instr * intrin,Shader & shader)673 bool RatInstr::emit_image_store(nir_intrinsic_instr *intrin, Shader& shader)
674 {
675    auto& vf = shader.value_factory();
676    auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0); {}
677 
678 
679    auto coord_load = vf.src_vec4(intrin->src[1], pin_chan);
680    auto coord =  vf.temp_vec4(pin_group);
681 
682    auto value_load = vf.src_vec4(intrin->src[3], pin_chan);
683    auto value =  vf.temp_vec4(pin_group);
684 
685    RegisterVec4::Swizzle swizzle = {0,1,2,3};
686    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
687        nir_intrinsic_image_array(intrin))
688       swizzle  = {0,2,1,3};
689 
690    for (int i = 0; i < 4; ++i) {
691       auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
692       shader.emit_instruction(new AluInstr(op1_mov, coord[swizzle[i]], coord_load[i], flags));
693    }
694    for (int i = 0; i < 4; ++i) {
695       auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
696       shader.emit_instruction(new AluInstr(op1_mov, value[i], value_load[i], flags));
697    }
698 
699    auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
700    auto store = new RatInstr(op, RatInstr::STORE_TYPED, value, coord, imageid,
701                              image_offset, 1, 0xf, 0);
702 
703    if (nir_intrinsic_has_access(intrin) & ACCESS_COHERENT)
704       store->set_ack();
705    shader.emit_instruction(store);
706    return true;
707 }
708 
emit_image_load_or_atomic(nir_intrinsic_instr * intrin,Shader & shader)709 bool RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader)
710 {
711    auto& vf = shader.value_factory();
712    auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0); {}
713 
714    bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
715    auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) :
716                                get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
717 
718    auto coord_orig =  vf.src_vec4(intrin->src[1], pin_chan);
719    auto coord = vf.temp_vec4(pin_group);
720 
721    auto data_vec4 = vf.temp_vec4(pin_group, {0,1,2,3});
722 
723    RegisterVec4::Swizzle swizzle = {0,1,2,3};
724    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
725        nir_intrinsic_image_array(intrin))
726       swizzle  = {0,2,1,3};
727 
728    for (int i = 0; i < 4; ++i) {
729       auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
730       shader.emit_instruction(new AluInstr(op1_mov, coord[swizzle[i]], coord_orig[i], flags));
731    }
732 
733    shader.emit_instruction(new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
734 
735    if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
736       shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[4], 0), AluInstr::write));
737       shader.emit_instruction(new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
738                                           vf.src(intrin->src[3], 0), AluInstr::last_write));
739    } else {
740       shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0],
741                                           vf.src(intrin->src[3], 0), AluInstr::write));
742       shader.emit_instruction(new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::last_write));
743    }
744 
745    auto atomic = new RatInstr(cf_mem_rat, opcode, data_vec4, coord, imageid,
746                               image_offset, 1, 0xf, 0);
747    shader.emit_instruction(atomic);
748 
749    atomic->set_ack();
750    if (read_result) {
751       atomic->set_instr_flag(ack_rat_return_write);
752       auto dest = vf.dest_vec4(intrin->dest, pin_group);
753 
754       pipe_format format = nir_intrinsic_format(intrin);
755       unsigned fmt = fmt_32;
756       unsigned num_format = 0;
757       unsigned format_comp = 0;
758       unsigned endian = 0;
759       r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
760 
761       auto fetch = new FetchInstr(vc_fetch,
762                                   dest, {0, 1, 2, 3},
763                                   shader.rat_return_address(),
764                                   0,
765                                   no_index_offset,
766                                   (EVTXDataFormat)fmt,
767                                   (EVFetchNumFormat)num_format,
768                                   (EVFetchEndianSwap)endian,
769                                   R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
770                                   image_offset);
771       fetch->set_mfc(3);
772       fetch->set_fetch_flag(FetchInstr::srf_mode);
773       fetch->set_fetch_flag(FetchInstr::use_tc);
774       fetch->set_fetch_flag(FetchInstr::vpm);
775       fetch->set_fetch_flag(FetchInstr::wait_ack);
776       if (format_comp)
777          fetch->set_fetch_flag(FetchInstr::format_comp_signed);
778 
779       shader.chain_ssbo_read(fetch);
780       shader.emit_instruction(fetch);
781    }
782 
783    return true;
784 }
785 
786 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
787 
emit_image_size(nir_intrinsic_instr * intrin,Shader & shader)788 bool RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader)
789 {
790    auto& vf = shader.value_factory();
791 
792    auto src = RegisterVec4(0, true, {4,4,4,4});
793 
794    assert(nir_src_as_uint(intrin->src[1]) == 0);
795 
796    auto const_offset = nir_src_as_const_value(intrin->src[0]);
797    PRegister dyn_offset = nullptr;
798 
799    int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
800    if (const_offset)
801       res_id += const_offset[0].u32;
802    else
803       dyn_offset = shader.emit_load_to_register(vf.src(intrin->src[0], 0));
804 
805    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
806       auto dest = vf.dest_vec4(intrin->dest, pin_group);
807       shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,1,2,3}, res_id));
808       return true;
809    } else {
810 
811       if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
812           nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
813          /* Need to load the layers from a const buffer */
814 
815          auto dest = vf.dest_vec4(intrin->dest, pin_group);
816          shader.emit_instruction(new TexInstr(TexInstr::get_resinfo, dest, {0,1,7,3},
817                                               src, 0/* ?? */, res_id, dyn_offset));
818 
819          shader.set_flag(Shader::sh_txs_cube_array_comp);
820 
821          if (const_offset) {
822             unsigned lookup_resid = const_offset[0].u32;
823             shader.emit_instruction(new AluInstr(op1_mov, dest[2],
824                                     vf.uniform(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL,  lookup_resid % 4,
825                                                R600_BUFFER_INFO_CONST_BUFFER),
826                                     AluInstr::last_write));
827          } else {
828             /* If the adressing is indirect we have to get the z-value by using a binary search */
829             auto addr = vf.temp_register();
830             auto comp1 = vf.temp_register();
831             auto comp2 = vf.temp_register();
832             auto low_bit = vf.temp_register();
833             auto high_bit = vf.temp_register();
834 
835             auto trgt = vf.temp_vec4(pin_group);
836 
837             shader.emit_instruction(new AluInstr(op2_lshr_int, addr, vf.src(intrin->src[0], 0),
838                              vf.literal(2), AluInstr::write));
839             shader.emit_instruction(new AluInstr(op2_and_int, low_bit, vf.src(intrin->src[0], 0),
840                              vf.one_i(), AluInstr::write));
841             shader.emit_instruction(new AluInstr(op2_and_int, high_bit, vf.src(intrin->src[0], 0),
842                              vf.literal(2), AluInstr::last_write));
843 
844             shader.emit_instruction(new LoadFromBuffer(trgt, {0, 1, 2, 3}, addr, R600_SHADER_BUFFER_INFO_SEL,
845                                                   R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float));
846 
847             // this may be wrong
848             shader.emit_instruction(new AluInstr(op3_cnde_int, comp1, high_bit, trgt[0], trgt[2],
849                                                 AluInstr::write));
850             shader.emit_instruction(new AluInstr(op3_cnde_int, comp2, high_bit, trgt[1], trgt[3],
851                                                  AluInstr::last_write));
852             shader.emit_instruction(new AluInstr(op3_cnde_int, dest[2], low_bit, comp1, comp2, AluInstr::last_write));
853          }
854       } else {
855          auto dest = vf.dest_vec4(intrin->dest, pin_group);
856          shader.emit_instruction(new TexInstr(TexInstr::get_resinfo, dest, {0,1,2,3},
857                                               src, 0/* ?? */, res_id, dyn_offset));
858 
859       }
860    }
861    return true;
862 }
863 
864 }
865