• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_instr_mem.h"
28 
29 #include "nir_intrinsics.h"
30 #include "nir_intrinsics_indices.h"
31 #include "sfn_alu_defines.h"
32 #include "sfn_instr_alu.h"
33 #include "sfn_instr_fetch.h"
34 #include "sfn_instr_tex.h"
35 #include "sfn_shader.h"
36 #include "sfn_virtualvalues.h"
37 
38 namespace r600 {
39 
GDSInstr(ESDOp op,Register * dest,const RegisterVec4 & src,int uav_base,PRegister uav_id)40 GDSInstr::GDSInstr(
41    ESDOp op, Register *dest, const RegisterVec4& src, int uav_base, PRegister uav_id):
42     Resource(this, uav_base, uav_id),
43     m_op(op),
44     m_dest(dest),
45     m_src(src)
46 {
47    set_always_keep();
48 
49    m_src.add_use(this);
50    if (m_dest)
51       m_dest->add_parent(this);
52 }
53 
54 bool
is_equal_to(const GDSInstr & rhs) const55 GDSInstr::is_equal_to(const GDSInstr& rhs) const
56 {
57 #define NE(X) (X != rhs.X)
58 
59    if (NE(m_op) || NE(m_src))
60       return false;
61 
62    sfn_value_equal(m_dest, rhs.m_dest);
63 
64    return resource_is_equal(rhs);
65 }
66 
67 void
accept(ConstInstrVisitor & visitor) const68 GDSInstr::accept(ConstInstrVisitor& visitor) const
69 {
70    visitor.visit(*this);
71 }
72 
73 void
accept(InstrVisitor & visitor)74 GDSInstr::accept(InstrVisitor& visitor)
75 {
76    visitor.visit(this);
77 }
78 
79 bool
do_ready() const80 GDSInstr::do_ready() const
81 {
82    return m_src.ready(block_id(), index()) && resource_ready(block_id(), index());
83 }
84 
85 void
do_print(std::ostream & os) const86 GDSInstr::do_print(std::ostream& os) const
87 {
88    os << "GDS " << lds_ops.at(m_op).name;
89    if (m_dest)
90       os << *m_dest;
91    else
92       os << "___";
93    os << " " << m_src;
94    os << " BASE:" << resource_id();
95 
96    print_resource_offset(os);
97 }
98 
99 bool
emit_atomic_counter(nir_intrinsic_instr * intr,Shader & shader)100 GDSInstr::emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader)
101 {
102    switch (intr->intrinsic) {
103    case nir_intrinsic_atomic_counter_add:
104    case nir_intrinsic_atomic_counter_and:
105    case nir_intrinsic_atomic_counter_exchange:
106    case nir_intrinsic_atomic_counter_max:
107    case nir_intrinsic_atomic_counter_min:
108    case nir_intrinsic_atomic_counter_or:
109    case nir_intrinsic_atomic_counter_xor:
110    case nir_intrinsic_atomic_counter_comp_swap:
111       return emit_atomic_op2(intr, shader);
112    case nir_intrinsic_atomic_counter_read:
113    case nir_intrinsic_atomic_counter_post_dec:
114       return emit_atomic_read(intr, shader);
115    case nir_intrinsic_atomic_counter_inc:
116       return emit_atomic_inc(intr, shader);
117    case nir_intrinsic_atomic_counter_pre_dec:
118       return emit_atomic_pre_dec(intr, shader);
119    default:
120       return false;
121    }
122 }
123 
allowed_src_chan_mask() const124 uint8_t GDSInstr::allowed_src_chan_mask() const
125 {
126    return m_src.free_chan_mask();
127 }
128 
129 static ESDOp
get_opcode(const nir_intrinsic_op opcode)130 get_opcode(const nir_intrinsic_op opcode)
131 {
132    switch (opcode) {
133    case nir_intrinsic_atomic_counter_add:
134       return DS_OP_ADD_RET;
135    case nir_intrinsic_atomic_counter_and:
136       return DS_OP_AND_RET;
137    case nir_intrinsic_atomic_counter_exchange:
138       return DS_OP_XCHG_RET;
139    case nir_intrinsic_atomic_counter_inc:
140       return DS_OP_INC_RET;
141    case nir_intrinsic_atomic_counter_max:
142       return DS_OP_MAX_UINT_RET;
143    case nir_intrinsic_atomic_counter_min:
144       return DS_OP_MIN_UINT_RET;
145    case nir_intrinsic_atomic_counter_or:
146       return DS_OP_OR_RET;
147    case nir_intrinsic_atomic_counter_read:
148       return DS_OP_READ_RET;
149    case nir_intrinsic_atomic_counter_xor:
150       return DS_OP_XOR_RET;
151    case nir_intrinsic_atomic_counter_post_dec:
152       return DS_OP_DEC_RET;
153    case nir_intrinsic_atomic_counter_comp_swap:
154       return DS_OP_CMP_XCHG_RET;
155    case nir_intrinsic_atomic_counter_pre_dec:
156    default:
157       return DS_OP_INVALID;
158    }
159 }
160 
161 static ESDOp
get_opcode_wo(const nir_intrinsic_op opcode)162 get_opcode_wo(const nir_intrinsic_op opcode)
163 {
164    switch (opcode) {
165    case nir_intrinsic_atomic_counter_add:
166       return DS_OP_ADD;
167    case nir_intrinsic_atomic_counter_and:
168       return DS_OP_AND;
169    case nir_intrinsic_atomic_counter_inc:
170       return DS_OP_INC;
171    case nir_intrinsic_atomic_counter_max:
172       return DS_OP_MAX_UINT;
173    case nir_intrinsic_atomic_counter_min:
174       return DS_OP_MIN_UINT;
175    case nir_intrinsic_atomic_counter_or:
176       return DS_OP_OR;
177    case nir_intrinsic_atomic_counter_xor:
178       return DS_OP_XOR;
179    case nir_intrinsic_atomic_counter_post_dec:
180       return DS_OP_DEC;
181    case nir_intrinsic_atomic_counter_comp_swap:
182       return DS_OP_CMP_XCHG_RET;
183    case nir_intrinsic_atomic_counter_exchange:
184       return DS_OP_XCHG_RET;
185    case nir_intrinsic_atomic_counter_pre_dec:
186    default:
187       return DS_OP_INVALID;
188    }
189 }
190 
191 bool
emit_atomic_op2(nir_intrinsic_instr * instr,Shader & shader)192 GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader)
193 {
194    auto& vf = shader.value_factory();
195    bool read_result = !list_is_empty(&instr->def.uses);
196 
197    ESDOp op =
198       read_result ? get_opcode(instr->intrinsic) : get_opcode_wo(instr->intrinsic);
199 
200    if (DS_OP_INVALID == op)
201       return false;
202 
203    auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
204    {
205    }
206    offset += nir_intrinsic_base(instr);
207 
208    auto dest = read_result ? vf.dest(instr->def, 0, pin_free) : nullptr;
209 
210    PRegister src_as_register = nullptr;
211    auto src_val = vf.src(instr->src[1], 0);
212    if (!src_val->as_register()) {
213       auto temp_src_val = vf.temp_register();
214       shader.emit_instruction(
215          new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::last_write));
216       src_as_register = temp_src_val;
217    } else
218       src_as_register = src_val->as_register();
219 
220    if (uav_id != nullptr)
221       shader.set_flag(Shader::sh_indirect_atomic);
222 
223    GDSInstr *ir = nullptr;
224    if (shader.chip_class() < ISA_CC_CAYMAN) {
225       RegisterVec4 src(nullptr, src_as_register, nullptr, nullptr, pin_free);
226       ir = new GDSInstr(op, dest, src, offset, uav_id);
227 
228    } else {
229       auto dest = vf.dest(instr->def, 0, pin_free);
230       auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
231       if (uav_id)
232          shader.emit_instruction(new AluInstr(op3_muladd_uint24,
233                                               tmp[0],
234                                               uav_id,
235                                               vf.literal(4),
236                                               vf.literal(4 * offset),
237                                               AluInstr::write));
238       else
239          shader.emit_instruction(
240             new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
241       shader.emit_instruction(
242          new AluInstr(op1_mov, tmp[1], src_val, AluInstr::last_write));
243       ir = new GDSInstr(op, dest, tmp, 0, nullptr);
244    }
245    shader.emit_instruction(ir);
246    return true;
247 }
248 
249 bool
emit_atomic_read(nir_intrinsic_instr * instr,Shader & shader)250 GDSInstr::emit_atomic_read(nir_intrinsic_instr *instr, Shader& shader)
251 {
252    auto& vf = shader.value_factory();
253 
254    auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
255    {
256    }
257    offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
258 
259    auto dest = vf.dest(instr->def, 0, pin_free);
260 
261    GDSInstr *ir = nullptr;
262 
263    if (shader.chip_class() < ISA_CC_CAYMAN) {
264       RegisterVec4 src = RegisterVec4(0, true, {7, 7, 7, 7});
265       ir = new GDSInstr(DS_OP_READ_RET, dest, src, offset, uav_id);
266    } else {
267       auto tmp = vf.temp_vec4(pin_group, {0, 7, 7, 7});
268       if (uav_id)
269          shader.emit_instruction(new AluInstr(op3_muladd_uint24,
270                                               tmp[0],
271                                               uav_id,
272                                               vf.literal(4),
273                                               vf.literal(4 * offset),
274                                               AluInstr::write));
275       else
276          shader.emit_instruction(
277             new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
278 
279       ir = new GDSInstr(DS_OP_READ_RET, dest, tmp, 0, nullptr);
280    }
281 
282    shader.emit_instruction(ir);
283    return true;
284 }
285 
286 bool
emit_atomic_inc(nir_intrinsic_instr * instr,Shader & shader)287 GDSInstr::emit_atomic_inc(nir_intrinsic_instr *instr, Shader& shader)
288 {
289    auto& vf = shader.value_factory();
290    bool read_result = !list_is_empty(&instr->def.uses);
291 
292    auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
293    {
294    }
295    offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
296 
297    GDSInstr *ir = nullptr;
298    auto dest = read_result ? vf.dest(instr->def, 0, pin_free) : nullptr;
299 
300    if (shader.chip_class() < ISA_CC_CAYMAN) {
301             RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
302       ir =
303          new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest, src, offset, uav_id);
304    } else {
305       auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
306 
307       if (uav_id)
308          shader.emit_instruction(new AluInstr(op3_muladd_uint24,
309                                               tmp[0],
310                                               uav_id,
311                                               vf.literal(4),
312                                               vf.literal(4 * offset),
313                                               AluInstr::write));
314       else
315          shader.emit_instruction(
316             new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
317 
318       shader.emit_instruction(
319          new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
320       ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest, tmp, 0, nullptr);
321    }
322    shader.emit_instruction(ir);
323    return true;
324 }
325 
326 bool
emit_atomic_pre_dec(nir_intrinsic_instr * instr,Shader & shader)327 GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader)
328 {
329    auto& vf = shader.value_factory();
330 
331    bool read_result = !list_is_empty(&instr->def.uses);
332 
333    auto opcode = read_result ? DS_OP_SUB_RET : DS_OP_SUB;
334 
335    auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
336    {
337    }
338    offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
339 
340 
341    auto *tmp_dest = read_result ? vf.temp_register() : nullptr;
342 
343    GDSInstr *ir = nullptr;
344 
345    if (shader.chip_class() < ISA_CC_CAYMAN) {
346       RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
347       ir = new GDSInstr(opcode, tmp_dest, src, offset, uav_id);
348    } else {
349       auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
350       if (uav_id)
351          shader.emit_instruction(new AluInstr(op3_muladd_uint24,
352                                               tmp[0],
353                                               uav_id,
354                                               vf.literal(4),
355                                               vf.literal(4 * offset),
356                                               AluInstr::write));
357       else
358          shader.emit_instruction(
359             new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
360 
361       shader.emit_instruction(
362          new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
363       ir = new GDSInstr(opcode, tmp_dest, tmp, 0, nullptr);
364    }
365 
366    shader.emit_instruction(ir);
367    if (read_result)
368       shader.emit_instruction(new AluInstr(op2_sub_int,
369                                            vf.dest(instr->def, 0, pin_free),
370                                            tmp_dest,
371                                            vf.one_i(),
372                                            AluInstr::last_write));
373    return true;
374 }
375 
update_indirect_addr(PRegister old_reg,PRegister addr)376 void GDSInstr::update_indirect_addr(PRegister old_reg, PRegister addr)
377 {
378    (void)old_reg;
379    set_resource_offset(addr);
380 }
381 
RatInstr(ECFOpCode cf_opcode,ERatOp rat_op,const RegisterVec4 & data,const RegisterVec4 & index,int rat_id,PRegister rat_id_offset,int burst_count,int comp_mask,int element_size)382 RatInstr::RatInstr(ECFOpCode cf_opcode,
383                    ERatOp rat_op,
384                    const RegisterVec4& data,
385                    const RegisterVec4& index,
386                    int rat_id,
387                    PRegister rat_id_offset,
388                    int burst_count,
389                    int comp_mask,
390                    int element_size):
391     Resource(this, rat_id, rat_id_offset),
392     m_cf_opcode(cf_opcode),
393     m_rat_op(rat_op),
394     m_data(data),
395     m_index(index),
396     m_burst_count(burst_count),
397     m_comp_mask(comp_mask),
398     m_element_size(element_size)
399 {
400    set_always_keep();
401    m_data.add_use(this);
402    m_index.add_use(this);
403 }
404 
405 void
accept(ConstInstrVisitor & visitor) const406 RatInstr::accept(ConstInstrVisitor& visitor) const
407 {
408    visitor.visit(*this);
409 }
410 
411 void
accept(InstrVisitor & visitor)412 RatInstr::accept(InstrVisitor& visitor)
413 {
414    visitor.visit(this);
415 }
416 
417 bool
is_equal_to(const RatInstr & lhs) const418 RatInstr::is_equal_to(const RatInstr& lhs) const
419 {
420    (void)lhs;
421    assert(0);
422    return false;
423 }
424 
425 bool
do_ready() const426 RatInstr::do_ready() const
427 {
428    if (m_rat_op != STORE_TYPED) {
429       for (auto i : required_instr()) {
430          if (!i->is_scheduled()) {
431             return false;
432          }
433       }
434    }
435 
436    return m_data.ready(block_id(), index()) && m_index.ready(block_id(), index());
437 }
438 
439 void
do_print(std::ostream & os) const440 RatInstr::do_print(std::ostream& os) const
441 {
442    os << "MEM_RAT RAT " << resource_id();
443    print_resource_offset(os);
444    os << " @" << m_index;
445    os << " OP:" << m_rat_op << " " << m_data;
446    os << " BC:" << m_burst_count << " MASK:" << m_comp_mask << " ES:" << m_element_size;
447    if (m_need_ack)
448       os << " ACK";
449 }
450 
update_indirect_addr(UNUSED PRegister old_reg,PRegister addr)451 void RatInstr::update_indirect_addr(UNUSED PRegister old_reg, PRegister addr)
452 {
453    set_resource_offset(addr);
454 }
455 
456 static RatInstr::ERatOp
get_rat_opcode(const nir_atomic_op opcode)457 get_rat_opcode(const nir_atomic_op opcode)
458 {
459    switch (opcode) {
460    case nir_atomic_op_iadd:
461       return RatInstr::ADD_RTN;
462    case nir_atomic_op_iand:
463       return RatInstr::AND_RTN;
464    case nir_atomic_op_ior:
465       return RatInstr::OR_RTN;
466    case nir_atomic_op_imin:
467       return RatInstr::MIN_INT_RTN;
468    case nir_atomic_op_imax:
469       return RatInstr::MAX_INT_RTN;
470    case nir_atomic_op_umin:
471       return RatInstr::MIN_UINT_RTN;
472    case nir_atomic_op_umax:
473       return RatInstr::MAX_UINT_RTN;
474    case nir_atomic_op_ixor:
475       return RatInstr::XOR_RTN;
476    case nir_atomic_op_cmpxchg:
477       return RatInstr::CMPXCHG_INT_RTN;
478    case nir_atomic_op_xchg:
479       return RatInstr::XCHG_RTN;
480    default:
481       unreachable("Unsupported atomic");
482    }
483 }
484 
485 static RatInstr::ERatOp
get_rat_opcode_wo(const nir_atomic_op opcode)486 get_rat_opcode_wo(const nir_atomic_op opcode)
487 {
488    switch (opcode) {
489    case nir_atomic_op_iadd:
490       return RatInstr::ADD;
491    case nir_atomic_op_iand:
492       return RatInstr::AND;
493    case nir_atomic_op_ior:
494       return RatInstr::OR;
495    case nir_atomic_op_imin:
496       return RatInstr::MIN_INT;
497    case nir_atomic_op_imax:
498       return RatInstr::MAX_INT;
499    case nir_atomic_op_umin:
500       return RatInstr::MIN_UINT;
501    case nir_atomic_op_umax:
502       return RatInstr::MAX_UINT;
503    case nir_atomic_op_ixor:
504       return RatInstr::XOR;
505    case nir_atomic_op_cmpxchg:
506       return RatInstr::CMPXCHG_INT;
507    case nir_atomic_op_xchg:
508       return RatInstr::XCHG_RTN;
509    default:
510       unreachable("Unsupported atomic");
511    }
512 }
513 
514 bool
emit(nir_intrinsic_instr * intr,Shader & shader)515 RatInstr::emit(nir_intrinsic_instr *intr, Shader& shader)
516 {
517    switch (intr->intrinsic) {
518    case nir_intrinsic_load_ssbo:
519       return emit_ssbo_load(intr, shader);
520    case nir_intrinsic_store_ssbo:
521       return emit_ssbo_store(intr, shader);
522    case nir_intrinsic_ssbo_atomic:
523    case nir_intrinsic_ssbo_atomic_swap:
524       return emit_ssbo_atomic_op(intr, shader);
525    case nir_intrinsic_store_global:
526       return emit_global_store(intr, shader);
527    case nir_intrinsic_image_store:
528       return emit_image_store(intr, shader);
529    case nir_intrinsic_image_load:
530    case nir_intrinsic_image_atomic:
531    case nir_intrinsic_image_atomic_swap:
532       return emit_image_load_or_atomic(intr, shader);
533    case nir_intrinsic_image_size:
534       return emit_image_size(intr, shader);
535    case nir_intrinsic_image_samples:
536       return emit_image_samples(intr, shader);
537    case nir_intrinsic_get_ssbo_size:
538       return emit_ssbo_size(intr, shader);
539    default:
540       return false;
541    }
542 }
543 
544 bool
emit_ssbo_load(nir_intrinsic_instr * intr,Shader & shader)545 RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader)
546 {
547    auto& vf = shader.value_factory();
548    auto dest = vf.dest_vec4(intr->def, pin_group);
549 
550    /** src0 not used, should be some offset */
551    auto addr = vf.src(intr->src[1], 0);
552    auto addr_temp = vf.temp_register();
553 
554    /** Should be lowered in nir */
555    shader.emit_instruction(new AluInstr(
556       op2_lshr_int, addr_temp, addr, vf.literal(2), {alu_write, alu_last_instr}));
557 
558    const EVTXDataFormat formats[4] = {fmt_32, fmt_32_32, fmt_32_32_32, fmt_32_32_32_32};
559 
560    RegisterVec4::Swizzle dest_swz[4] = {
561       {0, 7, 7, 7},
562       {0, 1, 7, 7},
563       {0, 1, 2, 7},
564       {0, 1, 2, 3}
565    };
566 
567    int comp_idx = intr->def.num_components - 1;
568 
569    auto [offset, res_offset] = shader.evaluate_resource_offset(intr, 0);
570    {
571    }
572 
573    auto res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + offset + shader.ssbo_image_offset();
574 
575    auto ir = new LoadFromBuffer(
576       dest, dest_swz[comp_idx], addr_temp, 0, res_id, res_offset, formats[comp_idx]);
577    ir->set_fetch_flag(FetchInstr::use_tc);
578    ir->set_num_format(vtx_nf_int);
579 
580    shader.emit_instruction(ir);
581    return true;
582 }
583 
584 bool
emit_global_store(nir_intrinsic_instr * intr,Shader & shader)585 RatInstr::emit_global_store(nir_intrinsic_instr *intr, Shader& shader)
586 {
587    auto& vf = shader.value_factory();
588    auto addr_orig = vf.src(intr->src[1], 0);
589    auto addr_vec = vf.temp_vec4(pin_chan, {0, 7, 7, 7});
590 
591    shader.emit_instruction(
592       new AluInstr(op2_lshr_int, addr_vec[0], addr_orig, vf.literal(2),
593                    AluInstr::last_write));
594 
595    RegisterVec4::Swizzle value_swz = {0,7,7,7};
596    auto mask = nir_intrinsic_write_mask(intr);
597    for (int i = 0; i < 4; ++i) {
598       if (mask & (1 << i))
599          value_swz[i] = i;
600    }
601 
602    auto value_vec = vf.temp_vec4(pin_chgr, value_swz);
603 
604    AluInstr *ir = nullptr;
605    for (int i = 0; i < 4; ++i) {
606       if (value_swz[i] < 4) {
607          ir = new AluInstr(op1_mov, value_vec[i],
608                            vf.src(intr->src[0], i), AluInstr::write);
609          shader.emit_instruction(ir);
610       }
611    }
612    if (ir)
613       ir->set_alu_flag(alu_last_instr);
614 
615    auto store = new RatInstr(cf_mem_rat_cacheless,
616                              RatInstr::STORE_RAW,
617                              value_vec,
618                              addr_vec,
619                              shader.ssbo_image_offset(),
620                              nullptr,
621                              1,
622                              mask,
623                              0);
624    shader.emit_instruction(store);
625    return true;
626 }
627 
628 bool
emit_ssbo_store(nir_intrinsic_instr * instr,Shader & shader)629 RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader)
630 {
631    auto& vf = shader.value_factory();
632    auto orig_addr = vf.src(instr->src[2], 0);
633 
634    auto addr_base = vf.temp_register();
635 
636    auto [offset, rat_id] = shader.evaluate_resource_offset(instr, 1);
637 
638    shader.emit_instruction(
639       new AluInstr(op2_lshr_int, addr_base, orig_addr, vf.literal(2), AluInstr::write));
640 
641    for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) {
642       auto addr_vec = vf.temp_vec4(pin_group, {0, 1, 2, 7});
643       if (i == 0) {
644          shader.emit_instruction(
645             new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::last_write));
646       } else {
647          shader.emit_instruction(new AluInstr(
648             op2_add_int, addr_vec[0], addr_base, vf.literal(i), AluInstr::last_write));
649       }
650       auto value = vf.src(instr->src[0], i);
651       PRegister v = vf.temp_register(0);
652       shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::last_write));
653       auto value_vec = RegisterVec4(v, nullptr, nullptr, nullptr, pin_chan);
654       auto store = new RatInstr(cf_mem_rat,
655                                 RatInstr::STORE_TYPED,
656                                 value_vec,
657                                 addr_vec,
658                                 offset + shader.ssbo_image_offset(),
659                                 rat_id,
660                                 1,
661                                 1,
662                                 0);
663       shader.emit_instruction(store);
664    }
665 
666    return true;
667 }
668 
669 bool
emit_ssbo_atomic_op(nir_intrinsic_instr * intr,Shader & shader)670 RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader)
671 {
672    auto& vf = shader.value_factory();
673    auto [imageid, image_offset] = shader.evaluate_resource_offset(intr, 0);
674    {
675    }
676 
677    bool read_result = !list_is_empty(&intr->def.uses);
678    auto opcode = read_result ? get_rat_opcode(nir_intrinsic_atomic_op(intr))
679                              : get_rat_opcode_wo(nir_intrinsic_atomic_op(intr));
680 
681    auto coord_orig = vf.src(intr->src[1], 0);
682    auto coord = vf.temp_register(0);
683 
684    auto data_vec4 = vf.temp_vec4(pin_chgr, {0, 1, 2, 3});
685 
686    shader.emit_instruction(
687       new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::last_write));
688 
689    shader.emit_instruction(
690       new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
691 
692    if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap) {
693       shader.emit_instruction(
694          new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[3], 0), AluInstr::write));
695       shader.emit_instruction(
696          new AluInstr(op1_mov,
697                       data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
698                       vf.src(intr->src[2], 0),
699                       {alu_last_instr, alu_write}));
700    } else {
701       shader.emit_instruction(new AluInstr(
702          op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::last_write));
703    }
704 
705    RegisterVec4 out_vec(coord, coord, coord, coord, pin_chgr);
706 
707    auto atomic = new RatInstr(cf_mem_rat,
708                               opcode,
709                               data_vec4,
710                               out_vec,
711                               imageid + shader.ssbo_image_offset(),
712                               image_offset,
713                               1,
714                               0xf,
715                               0);
716    shader.emit_instruction(atomic);
717 
718    atomic->set_ack();
719    if (read_result) {
720       atomic->set_instr_flag(ack_rat_return_write);
721       auto dest = vf.dest_vec4(intr->def, pin_group);
722 
723       auto fetch = new FetchInstr(vc_fetch,
724                                   dest,
725                                   {0, 1, 2, 3},
726                                   shader.rat_return_address(),
727                                   0,
728                                   no_index_offset,
729                                   fmt_32,
730                                   vtx_nf_int,
731                                   vtx_es_none,
732                                   R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
733                                   image_offset);
734       fetch->set_mfc(15);
735       fetch->set_fetch_flag(FetchInstr::srf_mode);
736       fetch->set_fetch_flag(FetchInstr::use_tc);
737       fetch->set_fetch_flag(FetchInstr::vpm);
738       fetch->set_fetch_flag(FetchInstr::wait_ack);
739       fetch->add_required_instr(atomic);
740       shader.chain_ssbo_read(fetch);
741       shader.emit_instruction(fetch);
742    }
743 
744    return true;
745 }
746 
747 bool
emit_ssbo_size(nir_intrinsic_instr * intr,Shader & shader)748 RatInstr::emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader)
749 {
750    auto& vf = shader.value_factory();
751    auto dest = vf.dest_vec4(intr->def, pin_group);
752 
753    auto const_offset = nir_src_as_const_value(intr->src[0]);
754    int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
755    if (const_offset)
756       res_id += const_offset[0].u32;
757    else
758       assert(0 && "dynamic buffer offset not supported in buffer_size");
759 
760    shader.emit_instruction(new QueryBufferSizeInstr(dest, {0, 1, 2, 3}, res_id));
761    return true;
762 }
763 
764 bool
emit_image_store(nir_intrinsic_instr * intrin,Shader & shader)765 RatInstr::emit_image_store(nir_intrinsic_instr *intrin, Shader& shader)
766 {
767    auto& vf = shader.value_factory();
768    auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0);
769    {
770    }
771 
772    auto coord_load = vf.src_vec4(intrin->src[1], pin_chan);
773    auto coord = vf.temp_vec4(pin_chgr);
774 
775    auto value_load = vf.src_vec4(intrin->src[3], pin_chan);
776    auto value = vf.temp_vec4(pin_chgr);
777 
778    RegisterVec4::Swizzle swizzle = {0, 1, 2, 3};
779    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
780        nir_intrinsic_image_array(intrin))
781       swizzle = {0, 2, 1, 3};
782 
783    for (int i = 0; i < 4; ++i) {
784       auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
785       shader.emit_instruction(
786          new AluInstr(op1_mov, coord[swizzle[i]], coord_load[i], flags));
787    }
788    for (int i = 0; i < 4; ++i) {
789       auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
790       shader.emit_instruction(new AluInstr(op1_mov, value[i], value_load[i], flags));
791    }
792 
793    auto op = cf_mem_rat; // nir_intrinsic_access(intrin) & ACCESS_COHERENT ?
794                          // cf_mem_rat_cacheless : cf_mem_rat;
795    auto store = new RatInstr(
796       op, RatInstr::STORE_TYPED, value, coord, imageid, image_offset, 1, 0xf, 0);
797 
798    store->set_ack();
799    if (nir_intrinsic_access(intrin) & ACCESS_INCLUDE_HELPERS)
800       store->set_instr_flag(Instr::helper);
801 
802    shader.emit_instruction(store);
803    return true;
804 }
805 
806 bool
emit_image_load_or_atomic(nir_intrinsic_instr * intrin,Shader & shader)807 RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader)
808 {
809    auto& vf = shader.value_factory();
810    auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0);
811    {
812    }
813 
814    bool read_result = !list_is_empty(&intrin->def.uses);
815    bool image_load = (intrin->intrinsic == nir_intrinsic_image_load);
816    auto opcode = image_load  ? RatInstr::NOP_RTN :
817                  read_result ? get_rat_opcode(nir_intrinsic_atomic_op(intrin))
818                              : get_rat_opcode_wo(nir_intrinsic_atomic_op(intrin));
819 
820    auto coord_orig = vf.src_vec4(intrin->src[1], pin_chan);
821    auto coord = vf.temp_vec4(pin_chgr);
822 
823    auto data_vec4 = vf.temp_vec4(pin_chgr, {0, 1, 2, 3});
824 
825    RegisterVec4::Swizzle swizzle = {0, 1, 2, 3};
826    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
827        nir_intrinsic_image_array(intrin))
828       swizzle = {0, 2, 1, 3};
829 
830    for (int i = 0; i < 4; ++i) {
831       auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
832       shader.emit_instruction(
833          new AluInstr(op1_mov, coord[swizzle[i]], coord_orig[i], flags));
834    }
835 
836    shader.emit_instruction(
837       new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
838 
839    if (intrin->intrinsic == nir_intrinsic_image_atomic_swap) {
840       shader.emit_instruction(
841          new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[4], 0), AluInstr::write));
842       shader.emit_instruction(
843          new AluInstr(op1_mov,
844                       data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
845                       vf.src(intrin->src[3], 0),
846                       AluInstr::last_write));
847    } else {
848       shader.emit_instruction(
849          new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[3], 0), AluInstr::write));
850       shader.emit_instruction(
851          new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::last_write));
852    }
853 
854    auto atomic =
855       new RatInstr(cf_mem_rat, opcode, data_vec4, coord, imageid, image_offset, 1, 0xf, 0);
856    shader.emit_instruction(atomic);
857 
858    atomic->set_ack();
859    if (read_result) {
860       atomic->set_instr_flag(ack_rat_return_write);
861       auto dest = vf.dest_vec4(intrin->def, pin_group);
862 
863       pipe_format format = nir_intrinsic_format(intrin);
864       unsigned fmt = fmt_32;
865       unsigned num_format = 0;
866       unsigned format_comp = 0;
867       unsigned endian = 0;
868       r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
869 
870       auto fetch = new FetchInstr(vc_fetch,
871                                   dest,
872                                   {0, 1, 2, 3},
873                                   shader.rat_return_address(),
874                                   0,
875                                   no_index_offset,
876                                   (EVTXDataFormat)fmt,
877                                   (EVFetchNumFormat)num_format,
878                                   (EVFetchEndianSwap)endian,
879                                   R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
880                                   image_offset);
881       fetch->set_mfc(3);
882       fetch->set_fetch_flag(FetchInstr::srf_mode);
883       fetch->set_fetch_flag(FetchInstr::use_tc);
884       fetch->set_fetch_flag(FetchInstr::vpm);
885       fetch->set_fetch_flag(FetchInstr::wait_ack);
886       if (format_comp)
887          fetch->set_fetch_flag(FetchInstr::format_comp_signed);
888 
889       shader.chain_ssbo_read(fetch);
890       shader.emit_instruction(fetch);
891    }
892 
893    return true;
894 }
895 
896 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
897 
898 bool
emit_image_size(nir_intrinsic_instr * intrin,Shader & shader)899 RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader)
900 {
901    auto& vf = shader.value_factory();
902 
903    auto src = RegisterVec4(0, true, {4, 4, 4, 4});
904 
905    assert(nir_src_as_uint(intrin->src[1]) == 0);
906 
907    auto const_offset = nir_src_as_const_value(intrin->src[0]);
908    PRegister dyn_offset = nullptr;
909 
910    int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + nir_intrinsic_range_base(intrin);
911    if (const_offset)
912       res_id += const_offset[0].u32;
913    else
914       dyn_offset = shader.emit_load_to_register(vf.src(intrin->src[0], 0));
915 
916    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
917       auto dest = vf.dest_vec4(intrin->def, pin_group);
918       shader.emit_instruction(new QueryBufferSizeInstr(dest, {0, 1, 2, 3}, res_id));
919       return true;
920    } else {
921 
922       if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
923           nir_intrinsic_image_array(intrin) &&
924           intrin->def.num_components > 2) {
925          /* Need to load the layers from a const buffer */
926 
927          auto dest = vf.dest_vec4(intrin->def, pin_group);
928          shader.emit_instruction(new TexInstr(TexInstr::get_resinfo,
929                                               dest,
930                                               {0, 1, 7, 3},
931                                               src,
932                                               res_id,
933                                               dyn_offset));
934 
935          shader.set_flag(Shader::sh_txs_cube_array_comp);
936 
937          if (const_offset) {
938             unsigned lookup_resid = const_offset[0].u32 + shader.image_size_const_offset();
939             shader.emit_instruction(
940                new AluInstr(op1_mov,
941                             dest[2],
942                             vf.uniform(lookup_resid / 4 + R600_SHADER_BUFFER_INFO_SEL,
943                                        lookup_resid % 4,
944                                        R600_BUFFER_INFO_CONST_BUFFER),
945                             AluInstr::last_write));
946          } else {
947             /* If the addressing is indirect we have to get the z-value by
948              * using a binary search */
949             auto addr = vf.temp_register();
950             auto comp1 = vf.temp_register();
951             auto comp2 = vf.temp_register();
952             auto low_bit = vf.temp_register();
953             auto high_bit = vf.temp_register();
954 
955             auto trgt = vf.temp_vec4(pin_group);
956 
957             shader.emit_instruction(new AluInstr(op2_lshr_int,
958                                                  addr,
959                                                  vf.src(intrin->src[0], 0),
960                                                  vf.literal(2),
961                                                  AluInstr::write));
962             shader.emit_instruction(new AluInstr(op2_and_int,
963                                                  low_bit,
964                                                  vf.src(intrin->src[0], 0),
965                                                  vf.one_i(),
966                                                  AluInstr::write));
967             shader.emit_instruction(new AluInstr(op2_and_int,
968                                                  high_bit,
969                                                  vf.src(intrin->src[0], 0),
970                                                  vf.literal(2),
971                                                  AluInstr::last_write));
972 
973             shader.emit_instruction(new LoadFromBuffer(trgt,
974                                                        {0, 1, 2, 3},
975                                                        addr,
976                                                        R600_SHADER_BUFFER_INFO_SEL,
977                                                        R600_BUFFER_INFO_CONST_BUFFER,
978                                                        nullptr,
979                                                        fmt_32_32_32_32_float));
980 
981             // this may be wrong
982             shader.emit_instruction(new AluInstr(
983                op3_cnde_int, comp1, high_bit, trgt[0], trgt[2], AluInstr::write));
984             shader.emit_instruction(new AluInstr(
985                op3_cnde_int, comp2, high_bit, trgt[1], trgt[3], AluInstr::last_write));
986             shader.emit_instruction(new AluInstr(
987                op3_cnde_int, dest[2], low_bit, comp1, comp2, AluInstr::last_write));
988          }
989       } else {
990          auto dest = vf.dest_vec4(intrin->def, pin_group);
991          shader.emit_instruction(new TexInstr(TexInstr::get_resinfo,
992                                               dest,
993                                               {0, 1, 2, 3},
994                                               src,
995                                               res_id,
996                                               dyn_offset));
997       }
998    }
999    return true;
1000 }
1001 
1002 bool
emit_image_samples(nir_intrinsic_instr * intrin,Shader & shader)1003 RatInstr::emit_image_samples(nir_intrinsic_instr *intrin, Shader& shader)
1004 {
1005    auto& vf = shader.value_factory();
1006 
1007    auto src = RegisterVec4(0, true, {4, 4, 4, 4});
1008 
1009    auto tmp =  shader.value_factory().temp_vec4(pin_group);
1010    auto dest =  shader.value_factory().dest(intrin->def, 0, pin_free);
1011 
1012    auto const_offset = nir_src_as_const_value(intrin->src[0]);
1013    PRegister dyn_offset = nullptr;
1014 
1015    int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + nir_intrinsic_range_base(intrin);
1016    if (const_offset)
1017       res_id += const_offset[0].u32;
1018    else
1019       dyn_offset = shader.emit_load_to_register(vf.src(intrin->src[0], 0));
1020 
1021    shader.emit_instruction(new TexInstr(TexInstr::get_resinfo,
1022                                         tmp,
1023                                         {3, 7, 7, 7},
1024                                         src,
1025                                         res_id,
1026                                         dyn_offset));
1027 
1028    shader.emit_instruction(new AluInstr(op1_mov, dest, tmp[0], AluInstr::last_write));
1029    return true;
1030 }
1031 
1032 } // namespace r600
1033