• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_assembler.h"
28 
29 #include "../eg_sq.h"
30 #include "../r600_asm.h"
31 
32 #include "sfn_callstack.h"
33 #include "sfn_conditionaljumptracker.h"
34 #include "sfn_debug.h"
35 #include "sfn_instr_alugroup.h"
36 #include "sfn_instr_controlflow.h"
37 #include "sfn_instr_export.h"
38 #include "sfn_instr_fetch.h"
39 #include "sfn_instr_mem.h"
40 #include "sfn_instr_tex.h"
41 
42 namespace r600 {
Assembler(r600_shader * sh,const r600_shader_key & key)43 Assembler::Assembler(r600_shader *sh, const r600_shader_key& key):
44     m_sh(sh),
45     m_key(key)
46 {
47 }
48 
49 extern const std::map<ESDOp, int> ds_opcode_map;
50 
51 class AssamblerVisitor : public ConstInstrVisitor {
52 public:
53    AssamblerVisitor(r600_shader *sh, const r600_shader_key& key, bool legacy_math_rules);
54 
55    void visit(const AluInstr& instr) override;
56    void visit(const AluGroup& instr) override;
57    void visit(const TexInstr& instr) override;
58    void visit(const ExportInstr& instr) override;
59    void visit(const FetchInstr& instr) override;
60    void visit(const Block& instr) override;
61    void visit(const IfInstr& instr) override;
62    void visit(const ControlFlowInstr& instr) override;
63    void visit(const ScratchIOInstr& instr) override;
64    void visit(const StreamOutInstr& instr) override;
65    void visit(const MemRingOutInstr& instr) override;
66    void visit(const EmitVertexInstr& instr) override;
67    void visit(const GDSInstr& instr) override;
68    void visit(const WriteTFInstr& instr) override;
69    void visit(const LDSAtomicInstr& instr) override;
70    void visit(const LDSReadInstr& instr) override;
71    void visit(const RatInstr& instr) override;
72 
73    void finalize();
74 
75    const uint32_t sf_vtx = 1;
76    const uint32_t sf_tex = 2;
77    const uint32_t sf_alu = 4;
78    const uint32_t sf_addr_register = 8;
79    const uint32_t sf_all = 0xf;
80 
81    void clear_states(const uint32_t& states);
82    bool copy_dst(r600_bytecode_alu_dst& dst, const Register& d, bool write);
83    PVirtualValue copy_src(r600_bytecode_alu_src& src, const VirtualValue& s);
84 
85    EBufferIndexMode emit_index_reg(const VirtualValue& addr, unsigned idx);
86 
87    void emit_endif();
88    void emit_else();
89    void emit_loop_begin(bool vpm);
90    void emit_loop_end();
91    void emit_loop_break();
92    void emit_loop_cont();
93 
94    void emit_alu_op(const AluInstr& ai);
95    void emit_lds_op(const AluInstr& lds);
96 
97    auto translate_for_mathrules(EAluOp op) -> EAluOp;
98 
99    void emit_wait_ack();
100 
101    /* Start initialized in constructor */
102    const r600_shader_key& m_key;
103    r600_shader *m_shader;
104    r600_bytecode *m_bc;
105 
106    ConditionalJumpTracker m_jump_tracker;
107    CallStack m_callstack;
108    bool ps_alpha_to_one;
109    /* End initialized in constructor */
110 
111    std::set<uint32_t> m_nliterals_in_group;
112    std::set<int> vtx_fetch_results;
113    std::set<int> tex_fetch_results;
114 
115    const VirtualValue *m_last_addr{nullptr};
116 
117    unsigned m_max_color_exports{0};
118    int m_loop_nesting{0};
119 
120    bool m_ack_suggested{false};
121    bool m_has_param_output{false};
122    bool m_has_pos_output{false};
123    bool m_last_op_was_barrier{false};
124    bool m_result{true};
125    bool m_legacy_math_rules{false};
126 };
127 
128 bool
lower(Shader * shader)129 Assembler::lower(Shader *shader)
130 {
131    AssamblerVisitor ass(m_sh, m_key, shader->has_flag(Shader::sh_legacy_math_rules));
132 
133    auto& blocks = shader->func();
134    for (auto b : blocks) {
135       b->accept(ass);
136       if (!ass.m_result)
137          return false;
138    }
139 
140    ass.finalize();
141 
142    return ass.m_result;
143 }
144 
AssamblerVisitor(r600_shader * sh,const r600_shader_key & key,bool legacy_math_rules)145 AssamblerVisitor::AssamblerVisitor(r600_shader *sh, const r600_shader_key& key,
146                                    bool legacy_math_rules):
147     m_key(key),
148     m_shader(sh),
149 
150     m_bc(&sh->bc),
151     m_callstack(sh->bc),
152     ps_alpha_to_one(key.ps.alpha_to_one),
153     m_legacy_math_rules(legacy_math_rules)
154 {
155    if (m_shader->processor_type == PIPE_SHADER_FRAGMENT)
156       m_max_color_exports = MAX2(m_key.ps.nr_cbufs, 1);
157 
158    if (m_shader->processor_type == PIPE_SHADER_VERTEX && m_shader->ninput > 0)
159       r600_bytecode_add_cfinst(m_bc, CF_OP_CALL_FS);
160 }
161 
162 void
finalize()163 AssamblerVisitor::finalize()
164 {
165    const struct cf_op_info *last = nullptr;
166 
167    if (m_bc->cf_last)
168       last = r600_isa_cf(m_bc->cf_last->op);
169 
170    /* alu clause instructions don't have EOP bit, so add NOP */
171    if (m_shader->bc.gfx_level < CAYMAN &&
172        (!last || last->flags & CF_ALU || m_bc->cf_last->op == CF_OP_LOOP_END ||
173         m_bc->cf_last->op == CF_OP_POP))
174       r600_bytecode_add_cfinst(m_bc, CF_OP_NOP);
175 
176    /* A fetch shader only can't be EOP (results in hang), but we can replace
177     * it by a NOP */
178    else if (last && m_bc->cf_last->op == CF_OP_CALL_FS)
179       m_bc->cf_last->op = CF_OP_NOP;
180 
181    if (m_shader->bc.gfx_level != CAYMAN)
182       m_bc->cf_last->end_of_program = 1;
183    else
184       cm_bytecode_add_cf_end(m_bc);
185 }
186 
187 extern const std::map<EAluOp, int> opcode_map;
188 
189 void
visit(const AluInstr & ai)190 AssamblerVisitor::visit(const AluInstr& ai)
191 {
192    assert(vtx_fetch_results.empty());
193    assert(tex_fetch_results.empty());
194 
195    if (unlikely(ai.has_alu_flag(alu_is_lds)))
196       emit_lds_op(ai);
197    else
198       emit_alu_op(ai);
199 }
200 
201 void
emit_lds_op(const AluInstr & lds)202 AssamblerVisitor::emit_lds_op(const AluInstr& lds)
203 {
204    struct r600_bytecode_alu alu;
205    memset(&alu, 0, sizeof(alu));
206 
207    alu.is_lds_idx_op = true;
208    alu.op = lds.lds_opcode();
209 
210    bool has_lds_fetch = false;
211    switch (alu.op) {
212    case LDS_WRITE:
213       alu.op = LDS_OP2_LDS_WRITE;
214       break;
215    case LDS_WRITE_REL:
216       alu.op = LDS_OP3_LDS_WRITE_REL;
217       alu.lds_idx = 1;
218       break;
219    case DS_OP_READ_RET:
220       alu.op = LDS_OP1_LDS_READ_RET;
221       FALLTHROUGH;
222    case LDS_ADD_RET:
223    case LDS_AND_RET:
224    case LDS_OR_RET:
225    case LDS_MAX_INT_RET:
226    case LDS_MAX_UINT_RET:
227    case LDS_MIN_INT_RET:
228    case LDS_MIN_UINT_RET:
229    case LDS_XOR_RET:
230    case LDS_XCHG_RET:
231    case LDS_CMP_XCHG_RET:
232       has_lds_fetch = true;
233       break;
234    case LDS_ADD:
235    case LDS_AND:
236    case LDS_OR:
237    case LDS_MAX_INT:
238    case LDS_MAX_UINT:
239    case LDS_MIN_INT:
240    case LDS_MIN_UINT:
241    case LDS_XOR:
242       break;
243    default:
244       std::cerr << "\n R600: error op: " << lds << "\n";
245       unreachable("Unhandled LDS op");
246    }
247 
248    copy_src(alu.src[0], lds.src(0));
249 
250    if (lds.n_sources() > 1)
251       copy_src(alu.src[1], lds.src(1));
252    else
253       alu.src[1].sel = V_SQ_ALU_SRC_0;
254 
255    if (lds.n_sources() > 2)
256       copy_src(alu.src[2], lds.src(2));
257    else
258       alu.src[2].sel = V_SQ_ALU_SRC_0;
259 
260    alu.last = lds.has_alu_flag(alu_last_instr);
261 
262    int r = r600_bytecode_add_alu(m_bc, &alu);
263    if (has_lds_fetch)
264       m_bc->cf_last->nlds_read++;
265 
266    if (r)
267       m_result = false;
268 }
269 
translate_for_mathrules(EAluOp op)270 auto AssamblerVisitor::translate_for_mathrules(EAluOp op) -> EAluOp
271 {
272    switch (op) {
273    case op2_dot_ieee: return op2_dot;
274    case op2_dot4_ieee: return op2_dot4;
275    case op2_mul_ieee: return op2_mul;
276    case op3_muladd_ieee : return op2_mul_ieee;
277    default:
278       return op;
279    }
280 }
281 
282 void
emit_alu_op(const AluInstr & ai)283 AssamblerVisitor::emit_alu_op(const AluInstr& ai)
284 {
285    sfn_log << SfnLog::assembly << "Emit ALU op " << ai << "\n";
286 
287    struct r600_bytecode_alu alu;
288    memset(&alu, 0, sizeof(alu));
289 
290    auto opcode = ai.opcode();
291 
292    if (unlikely(ai.opcode() == op1_mova_int &&
293                 (m_bc->gfx_level < CAYMAN || alu.dst.sel == 0))) {
294       m_last_addr = ai.psrc(0);
295       m_bc->ar_chan = m_last_addr->chan();
296       m_bc->ar_reg = m_last_addr->sel();
297    }
298 
299    if (m_legacy_math_rules)
300        opcode = translate_for_mathrules(opcode);
301 
302    auto hw_opcode = opcode_map.find(opcode);
303 
304    if (hw_opcode == opcode_map.end()) {
305       std::cerr << "Opcode not handled for " << ai << "\n";
306       m_result = false;
307       return;
308    }
309 
310    // skip multiple barriers
311    if (m_last_op_was_barrier && opcode == op0_group_barrier)
312       return;
313 
314    m_last_op_was_barrier = opcode == op0_group_barrier;
315 
316    alu.op = hw_opcode->second;
317 
318    auto dst = ai.dest();
319    if (dst) {
320       if (ai.opcode() != op1_mova_int) {
321          if (!copy_dst(alu.dst, *dst, ai.has_alu_flag(alu_write))) {
322             m_result = false;
323             return;
324          }
325 
326          alu.dst.write = ai.has_alu_flag(alu_write);
327          alu.dst.clamp = ai.has_alu_flag(alu_dst_clamp);
328          alu.dst.rel = dst->addr() ? 1 : 0;
329       } else if (m_bc->gfx_level == CAYMAN && ai.dest()->sel() > 0) {
330          alu.dst.sel = ai.dest()->sel() + 1;
331       }
332    }
333 
334    alu.is_op3 = ai.n_sources() == 3;
335 
336    EBufferIndexMode kcache_index_mode = bim_none;
337    PVirtualValue buffer_offset = nullptr;
338 
339    for (unsigned i = 0; i < ai.n_sources(); ++i) {
340       buffer_offset = copy_src(alu.src[i], ai.src(i));
341       alu.src[i].neg = ai.has_source_mod(i, AluInstr::mod_neg);
342       if (!alu.is_op3)
343          alu.src[i].abs = ai.has_source_mod(i, AluInstr::mod_abs);
344 
345       if (buffer_offset && kcache_index_mode == bim_none) {
346          auto idx_reg = buffer_offset->as_register();
347          if (idx_reg && idx_reg->has_flag(Register::addr_or_idx)) {
348             switch (idx_reg->sel()) {
349             case 1: kcache_index_mode = bim_zero; break;
350             case 2: kcache_index_mode = bim_one; break;
351             default:
352                unreachable("Unsupported index mode");
353             }
354          } else {
355             kcache_index_mode = bim_zero;
356          }
357          alu.src[i].kc_rel = kcache_index_mode;
358       }
359 
360       if (ai.has_lds_queue_read()) {
361          assert(m_bc->cf_last->nlds_read > 0);
362          m_bc->cf_last->nlds_read--;
363       }
364    }
365 
366    if (ai.bank_swizzle() != alu_vec_unknown)
367       alu.bank_swizzle_force = ai.bank_swizzle();
368 
369    alu.last = ai.has_alu_flag(alu_last_instr);
370    alu.execute_mask = ai.has_alu_flag(alu_update_exec);
371 
372    /* If the destination register is equal to the last loaded address register
373     * then clear the latter one, because the values will no longer be
374     * identical */
375    if (m_last_addr)
376       sfn_log << SfnLog::assembly << "  Current address register is " << *m_last_addr
377               << "\n";
378 
379    if (dst)
380       sfn_log << SfnLog::assembly << "  Current dst register is " << *dst << "\n";
381 
382    auto cf_op = ai.cf_type();
383 
384    unsigned type = 0;
385    switch (cf_op) {
386    case cf_alu:
387       type = CF_OP_ALU;
388       break;
389    case cf_alu_push_before:
390       type = CF_OP_ALU_PUSH_BEFORE;
391       break;
392    case cf_alu_pop_after:
393       type = CF_OP_ALU_POP_AFTER;
394       break;
395    case cf_alu_pop2_after:
396       type = CF_OP_ALU_POP2_AFTER;
397       break;
398    case cf_alu_break:
399       type = CF_OP_ALU_BREAK;
400       break;
401    case cf_alu_else_after:
402       type = CF_OP_ALU_ELSE_AFTER;
403       break;
404    case cf_alu_continue:
405       type = CF_OP_ALU_CONTINUE;
406       break;
407    case cf_alu_extended:
408       type = CF_OP_ALU_EXT;
409       break;
410    default:
411       assert(0 && "cf_alu_undefined should have been replaced");
412    }
413 
414    if (alu.last)
415       m_nliterals_in_group.clear();
416 
417    m_result = !r600_bytecode_add_alu_type(m_bc, &alu, type);
418 
419    if (unlikely(ai.opcode() == op1_mova_int)) {
420       if (m_bc->gfx_level < CAYMAN || alu.dst.sel == 0) {
421          m_bc->ar_loaded = 1;
422       } else if (m_bc->gfx_level == CAYMAN) {
423          int idx = alu.dst.sel - 2;
424          m_bc->index_loaded[idx] = 1;
425          m_bc->index_reg[idx] = -1;
426       }
427    }
428 
429    if (alu.dst.sel >= g_clause_local_start && alu.dst.sel < g_clause_local_end) {
430       int clidx = 4 * (alu.dst.sel - g_clause_local_start) + alu.dst.chan;
431       m_bc->cf_last->clause_local_written |= 1 << clidx;
432    }
433 
434    if (ai.opcode() == op1_set_cf_idx0) {
435       m_bc->index_loaded[0] = 1;
436       m_bc->index_reg[0] = -1;
437    }
438 
439    if (ai.opcode() == op1_set_cf_idx1) {
440       m_bc->index_loaded[1] = 1;
441       m_bc->index_reg[1] = -1;
442    }
443 }
444 
445 void
visit(const AluGroup & group)446 AssamblerVisitor::visit(const AluGroup& group)
447 {
448    clear_states(sf_vtx | sf_tex);
449 
450    if (group.slots() == 0)
451       return;
452 
453    static const unsigned slot_limit = 256;
454 
455    if (m_bc->cf_last && !m_bc->force_add_cf) {
456       if (group.has_lds_group_start()) {
457          if (m_bc->cf_last->ndw + 2 * (*group.begin())->required_slots() > slot_limit) {
458             assert(m_bc->cf_last->nlds_read == 0);
459             assert(0 && "Not allowed to start new alu group here");
460             m_bc->force_add_cf = 1;
461             m_last_addr = nullptr;
462          }
463       } else {
464          if (m_bc->cf_last->ndw + 2 * group.slots() > slot_limit) {
465             std::cerr << "m_bc->cf_last->ndw = " << m_bc->cf_last->ndw
466                       << " group.slots() = " << group.slots()
467                       << " -> " << m_bc->cf_last->ndw + 2 * group.slots()
468                       << "> slot_limit = " << slot_limit << "\n";
469             assert(m_bc->cf_last->nlds_read == 0);
470             assert(0 && "Not allowed to start new alu group here");
471             m_bc->force_add_cf = 1;
472             m_last_addr = nullptr;
473          } else {
474             auto instr = *group.begin();
475             if (instr && !instr->has_alu_flag(alu_is_lds) &&
476                 instr->opcode() == op0_group_barrier && m_bc->cf_last->ndw + 14 > slot_limit) {
477                assert(0 && "Not allowed to start new alu group here");
478                assert(m_bc->cf_last->nlds_read == 0);
479                m_bc->force_add_cf = 1;
480                m_last_addr = nullptr;
481             }
482          }
483       }
484    }
485 
486    auto [addr, is_index] = group.addr();
487 
488    if (addr) {
489       if (!addr->has_flag(Register::addr_or_idx)) {
490          if (is_index) {
491             emit_index_reg(*addr, 0);
492          } else {
493             auto reg = addr->as_register();
494             assert(reg);
495             if (!m_last_addr || !m_bc->ar_loaded || !m_last_addr->equal_to(*reg)) {
496                m_last_addr = reg;
497                m_bc->ar_reg = reg->sel();
498                m_bc->ar_chan = reg->chan();
499                m_bc->ar_loaded = 0;
500                r600_load_ar(m_bc, group.addr_for_src());
501             }
502          }
503       }
504    }
505 
506    for (auto& i : group) {
507       if (i)
508          i->accept(*this);
509    }
510 }
511 
512 void
visit(const TexInstr & tex_instr)513 AssamblerVisitor::visit(const TexInstr& tex_instr)
514 {
515    clear_states(sf_vtx | sf_alu);
516 
517    if (tex_fetch_results.find(tex_instr.src().sel()) != tex_fetch_results.end()) {
518       m_bc->force_add_cf = 1;
519       tex_fetch_results.clear();
520    }
521 
522    r600_bytecode_tex tex;
523    memset(&tex, 0, sizeof(struct r600_bytecode_tex));
524    tex.op = tex_instr.opcode();
525    tex.sampler_id = tex_instr.sampler_id();
526    tex.resource_id = tex_instr.resource_id();
527    tex.src_gpr = tex_instr.src().sel();
528    tex.dst_gpr = tex_instr.dst().sel();
529    tex.dst_sel_x = tex_instr.dest_swizzle(0);
530    tex.dst_sel_y = tex_instr.dest_swizzle(1);
531    tex.dst_sel_z = tex_instr.dest_swizzle(2);
532    tex.dst_sel_w = tex_instr.dest_swizzle(3);
533    tex.src_sel_x = tex_instr.src()[0]->chan();
534    tex.src_sel_y = tex_instr.src()[1]->chan();
535    tex.src_sel_z = tex_instr.src()[2]->chan();
536    tex.src_sel_w = tex_instr.src()[3]->chan();
537    tex.coord_type_x = !tex_instr.has_tex_flag(TexInstr::x_unnormalized);
538    tex.coord_type_y = !tex_instr.has_tex_flag(TexInstr::y_unnormalized);
539    tex.coord_type_z = !tex_instr.has_tex_flag(TexInstr::z_unnormalized);
540    tex.coord_type_w = !tex_instr.has_tex_flag(TexInstr::w_unnormalized);
541    tex.offset_x = tex_instr.get_offset(0);
542    tex.offset_y = tex_instr.get_offset(1);
543    tex.offset_z = tex_instr.get_offset(2);
544    tex.resource_index_mode = tex_instr.resource_index_mode();
545    tex.sampler_index_mode = tex_instr.sampler_index_mode();
546 
547    if (tex.dst_sel_x < 4 && tex.dst_sel_y < 4 && tex.dst_sel_z < 4 && tex.dst_sel_w < 4)
548       tex_fetch_results.insert(tex.dst_gpr);
549 
550    if (tex_instr.opcode() == TexInstr::get_gradient_h ||
551        tex_instr.opcode() == TexInstr::get_gradient_v)
552       tex.inst_mod = tex_instr.has_tex_flag(TexInstr::grad_fine) ? 1 : 0;
553    else
554       tex.inst_mod = tex_instr.inst_mode();
555    if (r600_bytecode_add_tex(m_bc, &tex)) {
556       R600_ASM_ERR("shader_from_nir: Error creating tex assembly instruction\n");
557       m_result = false;
558    }
559 }
560 
561 void
visit(const ExportInstr & exi)562 AssamblerVisitor::visit(const ExportInstr& exi)
563 {
564    const auto& value = exi.value();
565 
566    r600_bytecode_output output;
567    memset(&output, 0, sizeof(output));
568 
569    output.gpr = value.sel();
570    output.elem_size = 3;
571    output.swizzle_x = value[0]->chan();
572    output.swizzle_y = value[1]->chan();
573    output.swizzle_z = value[2]->chan();
574    output.burst_count = 1;
575    output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE : CF_OP_EXPORT;
576    output.type = exi.export_type();
577 
578    clear_states(sf_all);
579    switch (exi.export_type()) {
580    case ExportInstr::pixel:
581       output.swizzle_w = ps_alpha_to_one ? 5 : exi.value()[3]->chan();
582       output.array_base = exi.location();
583       break;
584    case ExportInstr::pos:
585       output.swizzle_w = exi.value()[3]->chan();
586       output.array_base = 60 + exi.location();
587       break;
588    case ExportInstr::param:
589       output.swizzle_w = exi.value()[3]->chan();
590       output.array_base = exi.location();
591       break;
592    default:
593       R600_ASM_ERR("shader_from_nir: export %d type not yet supported\n",
594                    exi.export_type());
595       m_result = false;
596    }
597 
598    /* If all register elements pinned to fixed values
599     * we can override the gpr (the register allocator doesn't see
600     * this because it doesn't take these channels into account. */
601    if (output.swizzle_x > 3 && output.swizzle_y > 3 && output.swizzle_z > 3 &&
602        output.swizzle_w > 3)
603       output.gpr = 0;
604 
605    int r = 0;
606    if ((r = r600_bytecode_add_output(m_bc, &output))) {
607       R600_ASM_ERR("Error adding export at location %d : err: %d\n", exi.location(), r);
608       m_result = false;
609    }
610 }
611 
612 void
visit(const ScratchIOInstr & instr)613 AssamblerVisitor::visit(const ScratchIOInstr& instr)
614 {
615    clear_states(sf_all);
616 
617    struct r600_bytecode_output cf;
618 
619    memset(&cf, 0, sizeof(struct r600_bytecode_output));
620 
621    cf.op = CF_OP_MEM_SCRATCH;
622    cf.elem_size = 3;
623    cf.gpr = instr.value().sel();
624    cf.mark = !instr.is_read();
625    cf.comp_mask = instr.is_read() ? 0xf : instr.write_mask();
626    cf.swizzle_x = 0;
627    cf.swizzle_y = 1;
628    cf.swizzle_z = 2;
629    cf.swizzle_w = 3;
630    cf.burst_count = 1;
631 
632    assert(!instr.is_read() || m_bc->gfx_level < R700);
633 
634    if (instr.address()) {
635       cf.type = instr.is_read() || m_bc->gfx_level > R600 ? 3 : 1;
636       cf.index_gpr = instr.address()->sel();
637 
638       /* The docu seems to be wrong here: In indirect addressing the
639        * address_base seems to be the array_size */
640       cf.array_size = instr.array_size();
641    } else {
642       cf.type = instr.is_read() || m_bc->gfx_level > R600 ? 2 : 0;
643       cf.array_base = instr.location();
644    }
645 
646    if (r600_bytecode_add_output(m_bc, &cf)) {
647       R600_ASM_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
648       m_result = false;
649    }
650 }
651 
652 void
visit(const StreamOutInstr & instr)653 AssamblerVisitor::visit(const StreamOutInstr& instr)
654 {
655    struct r600_bytecode_output output;
656    memset(&output, 0, sizeof(struct r600_bytecode_output));
657 
658    output.gpr = instr.value().sel();
659    output.elem_size = instr.element_size();
660    output.array_base = instr.array_base();
661    output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
662    output.burst_count = instr.burst_count();
663    output.array_size = instr.array_size();
664    output.comp_mask = instr.comp_mask();
665    output.op = instr.op(m_shader->bc.gfx_level);
666 
667    if (r600_bytecode_add_output(m_bc, &output)) {
668       R600_ASM_ERR("shader_from_nir: Error creating stream output instruction\n");
669       m_result = false;
670    }
671 }
672 
673 void
visit(const MemRingOutInstr & instr)674 AssamblerVisitor::visit(const MemRingOutInstr& instr)
675 {
676    struct r600_bytecode_output output;
677    memset(&output, 0, sizeof(struct r600_bytecode_output));
678 
679    output.gpr = instr.value().sel();
680    output.type = instr.type();
681    output.elem_size = 3;
682    output.comp_mask = 0xf;
683    output.burst_count = 1;
684    output.op = instr.op();
685    if (instr.type() == MemRingOutInstr::mem_write_ind ||
686        instr.type() == MemRingOutInstr::mem_write_ind_ack) {
687       output.index_gpr = instr.index_reg();
688       output.array_size = 0xfff;
689    }
690    output.array_base = instr.array_base();
691 
692    if (r600_bytecode_add_output(m_bc, &output)) {
693       R600_ASM_ERR("shader_from_nir: Error creating mem ring write instruction\n");
694       m_result = false;
695    }
696 }
697 
698 void
visit(const EmitVertexInstr & instr)699 AssamblerVisitor::visit(const EmitVertexInstr& instr)
700 {
701    int r = r600_bytecode_add_cfinst(m_bc, instr.op());
702    if (!r)
703       m_bc->cf_last->count = instr.stream();
704    else
705       m_result = false;
706    assert(m_bc->cf_last->count < 4);
707 }
708 
709 void
visit(const FetchInstr & fetch_instr)710 AssamblerVisitor::visit(const FetchInstr& fetch_instr)
711 {
712    bool use_tc =
713       fetch_instr.has_fetch_flag(FetchInstr::use_tc) || (m_bc->gfx_level == CAYMAN);
714 
715    auto clear_flags = use_tc ? sf_vtx : sf_tex;
716 
717    clear_states(clear_flags | sf_alu);
718 
719    if (fetch_instr.has_fetch_flag(FetchInstr::wait_ack))
720       emit_wait_ack();
721 
722 
723    if (!use_tc &&
724        vtx_fetch_results.find(fetch_instr.src().sel()) != vtx_fetch_results.end()) {
725       m_bc->force_add_cf = 1;
726       vtx_fetch_results.clear();
727    }
728 
729    if (fetch_instr.has_fetch_flag(FetchInstr::use_tc) &&
730        tex_fetch_results.find(fetch_instr.src().sel()) != tex_fetch_results.end()) {
731       m_bc->force_add_cf = 1;
732       tex_fetch_results.clear();
733    }
734 
735    if (use_tc)
736       tex_fetch_results.insert(fetch_instr.dst().sel());
737    else
738       vtx_fetch_results.insert(fetch_instr.dst().sel());
739 
740    struct r600_bytecode_vtx vtx;
741    memset(&vtx, 0, sizeof(vtx));
742    vtx.op = fetch_instr.opcode();
743    vtx.buffer_id = fetch_instr.resource_id();
744    vtx.fetch_type = fetch_instr.fetch_type();
745    vtx.src_gpr = fetch_instr.src().sel();
746    vtx.src_sel_x = fetch_instr.src().chan();
747    vtx.mega_fetch_count = fetch_instr.mega_fetch_count();
748    vtx.dst_gpr = fetch_instr.dst().sel();
749    vtx.dst_sel_x = fetch_instr.dest_swizzle(0); /* SEL_X */
750    vtx.dst_sel_y = fetch_instr.dest_swizzle(1); /* SEL_Y */
751    vtx.dst_sel_z = fetch_instr.dest_swizzle(2); /* SEL_Z */
752    vtx.dst_sel_w = fetch_instr.dest_swizzle(3); /* SEL_W */
753    vtx.use_const_fields = fetch_instr.has_fetch_flag(FetchInstr::use_const_field);
754    vtx.data_format = fetch_instr.data_format();
755    vtx.num_format_all = fetch_instr.num_format(); /* NUM_FORMAT_SCALED */
756    vtx.format_comp_all = fetch_instr.has_fetch_flag(FetchInstr::format_comp_signed);
757    vtx.endian = fetch_instr.endian_swap();
758    vtx.buffer_index_mode = fetch_instr.resource_index_mode();
759    vtx.offset = fetch_instr.src_offset();
760    vtx.indexed = fetch_instr.has_fetch_flag(FetchInstr::indexed);
761    vtx.uncached = fetch_instr.has_fetch_flag(FetchInstr::uncached);
762    vtx.elem_size = fetch_instr.elm_size();
763    vtx.array_base = fetch_instr.array_base();
764    vtx.array_size = fetch_instr.array_size();
765    vtx.srf_mode_all = fetch_instr.has_fetch_flag(FetchInstr::srf_mode);
766 
767    if (fetch_instr.has_fetch_flag(FetchInstr::use_tc)) {
768       if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) {
769          R600_ASM_ERR("shader_from_nir: Error creating tex assembly instruction\n");
770          m_result = false;
771       }
772 
773    } else {
774       if ((r600_bytecode_add_vtx(m_bc, &vtx))) {
775          R600_ASM_ERR("shader_from_nir: Error creating tex assembly instruction\n");
776          m_result = false;
777       }
778    }
779 
780    m_bc->cf_last->vpm =
781       (m_bc->type == PIPE_SHADER_FRAGMENT) && fetch_instr.has_fetch_flag(FetchInstr::vpm);
782    m_bc->cf_last->barrier = 1;
783 }
784 
785 void
visit(const WriteTFInstr & instr)786 AssamblerVisitor::visit(const WriteTFInstr& instr)
787 {
788    struct r600_bytecode_gds gds;
789 
790    auto& value = instr.value();
791 
792    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
793    gds.src_gpr = value.sel();
794    gds.src_sel_x = value[0]->chan();
795    gds.src_sel_y = value[1]->chan();
796    gds.src_sel_z = 4;
797    gds.dst_sel_x = 7;
798    gds.dst_sel_y = 7;
799    gds.dst_sel_z = 7;
800    gds.dst_sel_w = 7;
801    gds.op = FETCH_OP_TF_WRITE;
802 
803    if (r600_bytecode_add_gds(m_bc, &gds) != 0) {
804       m_result = false;
805       return;
806    }
807 
808    if (value[2]->chan() != 7) {
809       memset(&gds, 0, sizeof(struct r600_bytecode_gds));
810       gds.src_gpr = value.sel();
811       gds.src_sel_x = value[2]->chan();
812       gds.src_sel_y = value[3]->chan();
813       gds.src_sel_z = 4;
814       gds.dst_sel_x = 7;
815       gds.dst_sel_y = 7;
816       gds.dst_sel_z = 7;
817       gds.dst_sel_w = 7;
818       gds.op = FETCH_OP_TF_WRITE;
819 
820       if (r600_bytecode_add_gds(m_bc, &gds)) {
821          m_result = false;
822          return;
823       }
824    }
825 }
826 
827 void
visit(const RatInstr & instr)828 AssamblerVisitor::visit(const RatInstr& instr)
829 {
830    struct r600_bytecode_gds gds;
831 
832    /* The instruction writes to the retuen buffer location, and
833     * the value will actually be read back, so make sure all previous writes
834     * have been finished */
835    if (m_ack_suggested /*&& instr.has_instr_flag(Instr::ack_rat_return_write)*/)
836       emit_wait_ack();
837 
838    int rat_idx = instr.resource_id();
839 
840    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
841 
842    r600_bytecode_add_cfinst(m_bc, instr.cf_opcode());
843    auto cf = m_bc->cf_last;
844    cf->rat.id = rat_idx + m_shader->rat_base;
845    cf->rat.inst = instr.rat_op();
846    cf->rat.index_mode = instr.resource_index_mode();
847    cf->output.type = instr.need_ack() ? 3 : 1;
848    cf->output.gpr = instr.data_gpr();
849    cf->output.index_gpr = instr.index_gpr();
850    cf->output.comp_mask = instr.comp_mask();
851    cf->output.burst_count = instr.burst_count();
852    assert(instr.data_swz(0) == PIPE_SWIZZLE_X);
853    if (cf->rat.inst != RatInstr::STORE_TYPED) {
854       assert(instr.data_swz(1) == PIPE_SWIZZLE_Y ||
855              instr.data_swz(1) == PIPE_SWIZZLE_MAX);
856       assert(instr.data_swz(2) == PIPE_SWIZZLE_Z ||
857              instr.data_swz(2) == PIPE_SWIZZLE_MAX);
858    }
859 
860    cf->vpm = m_bc->type == PIPE_SHADER_FRAGMENT;
861    cf->barrier = 1;
862    cf->mark = instr.need_ack();
863    cf->output.elem_size = instr.elm_size();
864 
865    m_ack_suggested |= instr.need_ack();
866 }
867 
868 void
clear_states(const uint32_t & states)869 AssamblerVisitor::clear_states(const uint32_t& states)
870 {
871    if (states & sf_vtx)
872       vtx_fetch_results.clear();
873 
874    if (states & sf_tex)
875       tex_fetch_results.clear();
876 
877    if (states & sf_alu) {
878       m_last_op_was_barrier = false;
879       m_last_addr = nullptr;
880    }
881 }
882 
883 void
visit(const Block & block)884 AssamblerVisitor::visit(const Block& block)
885 {
886    if (block.empty())
887       return;
888 
889    if (block.has_instr_flag(Instr::force_cf)) {
890       m_bc->force_add_cf = 1;
891       m_bc->ar_loaded = 0;
892       m_last_addr = nullptr;
893    }
894    sfn_log << SfnLog::assembly << "Translate block  size: " << block.size()
895            << " new_cf:" << m_bc->force_add_cf << "\n";
896 
897    for (const auto& i : block) {
898       sfn_log << SfnLog::assembly << "Translate " << *i << " ";
899       i->accept(*this);
900       sfn_log << SfnLog::assembly << (m_result ? "good" : "fail") << "\n";
901 
902       if (!m_result)
903          break;
904    }
905 }
906 
907 void
visit(const IfInstr & instr)908 AssamblerVisitor::visit(const IfInstr& instr)
909 {
910    int elems = m_callstack.push(FC_PUSH_VPM);
911    bool needs_workaround = false;
912 
913    if (m_bc->gfx_level == CAYMAN && m_bc->stack.loop > 1)
914       needs_workaround = true;
915 
916    if (m_bc->gfx_level == EVERGREEN && m_bc->family != CHIP_HEMLOCK &&
917        m_bc->family != CHIP_CYPRESS && m_bc->family != CHIP_JUNIPER) {
918       unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
919       unsigned dmod2 = (elems) % m_bc->stack.entry_size;
920 
921       if (elems && (!dmod1 || !dmod2))
922          needs_workaround = true;
923    }
924 
925    auto pred = instr.predicate();
926    auto [addr, dummy0, dummy1] = pred->indirect_addr();
927    {
928    }
929    assert(!dummy1);
930    if (addr) {
931       if (!m_last_addr || !m_bc->ar_loaded || !m_last_addr->equal_to(*addr)) {
932          m_bc->ar_reg = addr->sel();
933          m_bc->ar_chan = addr->chan();
934          m_last_addr = addr;
935          m_bc->ar_loaded = 0;
936 
937          r600_load_ar(m_bc, true);
938       }
939    }
940 
941    if (needs_workaround) {
942       r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
943       m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
944       r600_bytecode_add_cfinst(m_bc, CF_OP_ALU);
945       pred->set_cf_type(cf_alu);
946    }
947 
948    clear_states(sf_tex | sf_vtx);
949    pred->accept(*this);
950 
951    r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
952    clear_states(sf_all);
953 
954    m_jump_tracker.push(m_bc->cf_last, jt_if);
955 }
956 
957 void
visit(const ControlFlowInstr & instr)958 AssamblerVisitor::visit(const ControlFlowInstr& instr)
959 {
960    clear_states(sf_all);
961    switch (instr.cf_type()) {
962    case ControlFlowInstr::cf_else:
963       emit_else();
964       break;
965    case ControlFlowInstr::cf_endif:
966       emit_endif();
967       break;
968    case ControlFlowInstr::cf_loop_begin: {
969       bool use_vpm = m_shader->processor_type == PIPE_SHADER_FRAGMENT &&
970                      instr.has_instr_flag(Instr::vpm) &&
971                      !instr.has_instr_flag(Instr::helper);
972       emit_loop_begin(use_vpm);
973       break;
974    }
975    case ControlFlowInstr::cf_loop_end:
976       emit_loop_end();
977       break;
978    case ControlFlowInstr::cf_loop_break:
979       emit_loop_break();
980       break;
981    case ControlFlowInstr::cf_loop_continue:
982       emit_loop_cont();
983       break;
984    case ControlFlowInstr::cf_wait_ack: {
985       int r = r600_bytecode_add_cfinst(m_bc, CF_OP_WAIT_ACK);
986       if (!r) {
987          m_bc->cf_last->cf_addr = 0;
988          m_bc->cf_last->barrier = 1;
989          m_ack_suggested = false;
990       } else {
991          m_result = false;
992       }
993    } break;
994    default:
995       unreachable("Unknown CF instruction type");
996    }
997 }
998 
999 void
visit(const GDSInstr & instr)1000 AssamblerVisitor::visit(const GDSInstr& instr)
1001 {
1002    struct r600_bytecode_gds gds;
1003 
1004    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
1005 
1006    gds.op = ds_opcode_map.at(instr.opcode());
1007    gds.uav_id = instr.resource_id();
1008    gds.uav_index_mode = instr.resource_index_mode();
1009    gds.src_gpr = instr.src().sel();
1010 
1011    gds.src_sel_x = instr.src()[0]->chan() < 7 ? instr.src()[0]->chan() : 4;
1012    gds.src_sel_y = instr.src()[1]->chan() < 7 ? instr.src()[1]->chan() : 4;
1013    gds.src_sel_z = instr.src()[2]->chan() < 7 ? instr.src()[2]->chan() : 4;
1014 
1015    gds.dst_sel_x = 7;
1016    gds.dst_sel_y = 7;
1017    gds.dst_sel_z = 7;
1018    gds.dst_sel_w = 7;
1019 
1020    if (instr.dest()) {
1021       gds.dst_gpr = instr.dest()->sel();
1022       switch (instr.dest()->chan()) {
1023       case 0:
1024          gds.dst_sel_x = 0;
1025          break;
1026       case 1:
1027          gds.dst_sel_y = 0;
1028          break;
1029       case 2:
1030          gds.dst_sel_z = 0;
1031          break;
1032       case 3:
1033          gds.dst_sel_w = 0;
1034       }
1035    }
1036 
1037    gds.src_gpr2 = 0;
1038    gds.alloc_consume = m_bc->gfx_level < CAYMAN ? 1 : 0; // Not Cayman
1039 
1040    int r = r600_bytecode_add_gds(m_bc, &gds);
1041    if (r) {
1042       m_result = false;
1043       return;
1044    }
1045    m_bc->cf_last->vpm = PIPE_SHADER_FRAGMENT == m_bc->type;
1046    m_bc->cf_last->barrier = 1;
1047 }
1048 
1049 void
visit(const LDSAtomicInstr & instr)1050 AssamblerVisitor::visit(const LDSAtomicInstr& instr)
1051 {
1052    (void)instr;
1053    unreachable("LDSAtomicInstr must be lowered to ALUInstr");
1054 }
1055 
1056 void
visit(const LDSReadInstr & instr)1057 AssamblerVisitor::visit(const LDSReadInstr& instr)
1058 {
1059    (void)instr;
1060    unreachable("LDSReadInstr must be lowered to ALUInstr");
1061 }
1062 
1063 EBufferIndexMode
emit_index_reg(const VirtualValue & addr,unsigned idx)1064 AssamblerVisitor::emit_index_reg(const VirtualValue& addr, unsigned idx)
1065 {
1066    assert(idx < 2);
1067 
1068    if (!m_bc->index_loaded[idx] || m_loop_nesting ||
1069        m_bc->index_reg[idx] != (unsigned)addr.sel() ||
1070        m_bc->index_reg_chan[idx] != (unsigned)addr.chan()) {
1071       struct r600_bytecode_alu alu;
1072 
1073       // Make sure MOVA is not last instr in clause
1074 
1075       if (!m_bc->cf_last || (m_bc->cf_last->ndw >> 1) >= 110)
1076          m_bc->force_add_cf = 1;
1077 
1078       if (m_bc->gfx_level != CAYMAN) {
1079 
1080          EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0;
1081 
1082          memset(&alu, 0, sizeof(alu));
1083          alu.op = opcode_map.at(op1_mova_int);
1084          alu.dst.chan = 0;
1085          alu.src[0].sel = addr.sel();
1086          alu.src[0].chan = addr.chan();
1087          alu.last = 1;
1088          sfn_log << SfnLog::assembly << "   mova_int, ";
1089          int r = r600_bytecode_add_alu(m_bc, &alu);
1090          if (r)
1091             return bim_invalid;
1092 
1093          alu.op = opcode_map.at(idxop);
1094          alu.dst.chan = 0;
1095          alu.src[0].sel = 0;
1096          alu.src[0].chan = 0;
1097          alu.last = 1;
1098          sfn_log << SfnLog::assembly << "op1_set_cf_idx" << idx;
1099          r = r600_bytecode_add_alu(m_bc, &alu);
1100          if (r)
1101             return bim_invalid;
1102       } else {
1103          memset(&alu, 0, sizeof(alu));
1104          alu.op = opcode_map.at(op1_mova_int);
1105          alu.dst.sel = idx == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
1106          alu.dst.chan = 0;
1107          alu.src[0].sel = addr.sel();
1108          alu.src[0].chan = addr.chan();
1109          alu.last = 1;
1110          sfn_log << SfnLog::assembly << "   mova_int, ";
1111          int r = r600_bytecode_add_alu(m_bc, &alu);
1112          if (r)
1113             return bim_invalid;
1114       }
1115 
1116       m_bc->ar_loaded = 0;
1117       m_bc->index_reg[idx] = addr.sel();
1118       m_bc->index_reg_chan[idx] = addr.chan();
1119       m_bc->index_loaded[idx] = true;
1120       m_bc->force_add_cf = 1;
1121       sfn_log << SfnLog::assembly << "\n";
1122    }
1123    return idx == 0 ? bim_zero : bim_one;
1124 }
1125 
1126 void
emit_else()1127 AssamblerVisitor::emit_else()
1128 {
1129    r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
1130    m_bc->cf_last->pop_count = 1;
1131    m_result &= m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
1132 }
1133 
1134 void
emit_endif()1135 AssamblerVisitor::emit_endif()
1136 {
1137    m_callstack.pop(FC_PUSH_VPM);
1138 
1139    unsigned force_pop = m_bc->force_add_cf;
1140    if (!force_pop) {
1141       int alu_pop = 3;
1142       if (m_bc->cf_last) {
1143          if (m_bc->cf_last->op == CF_OP_ALU)
1144             alu_pop = 0;
1145          else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
1146             alu_pop = 1;
1147       }
1148       alu_pop += 1;
1149       if (alu_pop == 1) {
1150          m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
1151          m_bc->force_add_cf = 1;
1152       } else {
1153          force_pop = 1;
1154       }
1155    }
1156 
1157    if (force_pop) {
1158       r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
1159       m_bc->cf_last->pop_count = 1;
1160       m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
1161    }
1162 
1163    m_result &= m_jump_tracker.pop(m_bc->cf_last, jt_if);
1164 }
1165 
1166 void
emit_loop_begin(bool vpm)1167 AssamblerVisitor::emit_loop_begin(bool vpm)
1168 {
1169    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
1170    m_bc->cf_last->vpm = vpm && m_bc->type == PIPE_SHADER_FRAGMENT;
1171    m_jump_tracker.push(m_bc->cf_last, jt_loop);
1172    m_callstack.push(FC_LOOP);
1173    ++m_loop_nesting;
1174 }
1175 
1176 void
emit_loop_end()1177 AssamblerVisitor::emit_loop_end()
1178 {
1179    if (m_ack_suggested) {
1180       emit_wait_ack();
1181       m_ack_suggested = false;
1182    }
1183 
1184    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
1185    m_callstack.pop(FC_LOOP);
1186    assert(m_loop_nesting);
1187    --m_loop_nesting;
1188    m_result |= m_jump_tracker.pop(m_bc->cf_last, jt_loop);
1189 }
1190 
1191 void
emit_loop_break()1192 AssamblerVisitor::emit_loop_break()
1193 {
1194    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
1195    m_result |= m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
1196 }
1197 
1198 void
emit_loop_cont()1199 AssamblerVisitor::emit_loop_cont()
1200 {
1201    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
1202    m_result |= m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
1203 }
1204 
1205 bool
copy_dst(r600_bytecode_alu_dst & dst,const Register & d,bool write)1206 AssamblerVisitor::copy_dst(r600_bytecode_alu_dst& dst, const Register& d, bool write)
1207 {
1208    if (write && d.sel() > g_clause_local_end) {
1209       R600_ASM_ERR("shader_from_nir: Don't support more then 123 GPRs + 4 clause "
1210                    "local, but try using %d\n",
1211                    d.sel());
1212       m_result = false;
1213       return false;
1214    }
1215 
1216    dst.sel = d.sel();
1217    dst.chan = d.chan();
1218 
1219    if (m_last_addr && m_last_addr->equal_to(d))
1220       m_last_addr = nullptr;
1221 
1222    for (int i = 0; i < 2; ++i) {
1223       /* Force emitting index register, if we didn't emit it yet, because
1224        * the register value will change now */
1225       if (dst.sel == m_bc->index_reg[i] && dst.chan == m_bc->index_reg_chan[i])
1226          m_bc->index_loaded[i] = false;
1227    }
1228 
1229    return true;
1230 }
1231 
1232 void
emit_wait_ack()1233 AssamblerVisitor::emit_wait_ack()
1234 {
1235    int r = r600_bytecode_add_cfinst(m_bc, CF_OP_WAIT_ACK);
1236    if (!r) {
1237       m_bc->cf_last->cf_addr = 0;
1238       m_bc->cf_last->barrier = 1;
1239       m_ack_suggested = false;
1240    } else
1241       m_result = false;
1242 }
1243 
1244 class EncodeSourceVisitor : public ConstRegisterVisitor {
1245 public:
1246    EncodeSourceVisitor(r600_bytecode_alu_src& s, r600_bytecode *bc);
1247    void visit(const Register& value) override;
1248    void visit(const LocalArray& value) override;
1249    void visit(const LocalArrayValue& value) override;
1250    void visit(const UniformValue& value) override;
1251    void visit(const LiteralConstant& value) override;
1252    void visit(const InlineConstant& value) override;
1253 
1254    r600_bytecode_alu_src& src;
1255    r600_bytecode *m_bc;
1256    PVirtualValue m_buffer_offset{nullptr};
1257 };
1258 
1259 PVirtualValue
copy_src(r600_bytecode_alu_src & src,const VirtualValue & s)1260 AssamblerVisitor::copy_src(r600_bytecode_alu_src& src, const VirtualValue& s)
1261 {
1262 
1263    EncodeSourceVisitor visitor(src, m_bc);
1264    src.sel = s.sel();
1265    src.chan = s.chan();
1266 
1267    if (s.sel() >= g_clause_local_start && s.sel() < g_clause_local_end ) {
1268       assert(m_bc->cf_last);
1269       int clidx = 4 * (s.sel() - g_clause_local_start) + s.chan();
1270       /* Ensure that the clause local register was already written */
1271       assert(m_bc->cf_last->clause_local_written & (1 << clidx));
1272    }
1273 
1274    s.accept(visitor);
1275    return visitor.m_buffer_offset;
1276 }
1277 
EncodeSourceVisitor(r600_bytecode_alu_src & s,r600_bytecode * bc)1278 EncodeSourceVisitor::EncodeSourceVisitor(r600_bytecode_alu_src& s, r600_bytecode *bc):
1279     src(s),
1280     m_bc(bc)
1281 {
1282 }
1283 
1284 void
visit(const Register & value)1285 EncodeSourceVisitor::visit(const Register& value)
1286 {
1287    assert(value.sel() < g_clause_local_end && "Only have 123 reisters + 4 clause local");
1288 }
1289 
1290 void
visit(const LocalArray & value)1291 EncodeSourceVisitor::visit(const LocalArray& value)
1292 {
1293    (void)value;
1294    unreachable("An array can't be a source register");
1295 }
1296 
1297 void
visit(const LocalArrayValue & value)1298 EncodeSourceVisitor::visit(const LocalArrayValue& value)
1299 {
1300    src.rel = value.addr() ? 1 : 0;
1301 }
1302 
1303 void
visit(const UniformValue & value)1304 EncodeSourceVisitor::visit(const UniformValue& value)
1305 {
1306    assert(value.sel() >= 512 && "Uniform values must have a sel >= 512");
1307    m_buffer_offset = value.buf_addr();
1308    src.kc_bank = value.kcache_bank();
1309 }
1310 
1311 void
visit(const LiteralConstant & value)1312 EncodeSourceVisitor::visit(const LiteralConstant& value)
1313 {
1314    src.value = value.value();
1315 }
1316 
1317 void
visit(const InlineConstant & value)1318 EncodeSourceVisitor::visit(const InlineConstant& value)
1319 {
1320    (void)value;
1321 }
1322 
1323 const std::map<EAluOp, int> opcode_map = {
1324 
1325    {op2_add,                       ALU_OP2_ADD                      },
1326    {op2_mul,                       ALU_OP2_MUL                      },
1327    {op2_mul_ieee,                  ALU_OP2_MUL_IEEE                 },
1328    {op2_max,                       ALU_OP2_MAX                      },
1329    {op2_min,                       ALU_OP2_MIN                      },
1330    {op2_max_dx10,                  ALU_OP2_MAX_DX10                 },
1331    {op2_min_dx10,                  ALU_OP2_MIN_DX10                 },
1332    {op2_sete,                      ALU_OP2_SETE                     },
1333    {op2_setgt,                     ALU_OP2_SETGT                    },
1334    {op2_setge,                     ALU_OP2_SETGE                    },
1335    {op2_setne,                     ALU_OP2_SETNE                    },
1336    {op2_sete_dx10,                 ALU_OP2_SETE_DX10                },
1337    {op2_setgt_dx10,                ALU_OP2_SETGT_DX10               },
1338    {op2_setge_dx10,                ALU_OP2_SETGE_DX10               },
1339    {op2_setne_dx10,                ALU_OP2_SETNE_DX10               },
1340    {op1_fract,                     ALU_OP1_FRACT                    },
1341    {op1_trunc,                     ALU_OP1_TRUNC                    },
1342    {op1_ceil,                      ALU_OP1_CEIL                     },
1343    {op1_rndne,                     ALU_OP1_RNDNE                    },
1344    {op1_floor,                     ALU_OP1_FLOOR                    },
1345    {op2_ashr_int,                  ALU_OP2_ASHR_INT                 },
1346    {op2_lshr_int,                  ALU_OP2_LSHR_INT                 },
1347    {op2_lshl_int,                  ALU_OP2_LSHL_INT                 },
1348    {op1_mov,                       ALU_OP1_MOV                      },
1349    {op0_nop,                       ALU_OP0_NOP                      },
1350    {op2_mul_64,                    ALU_OP2_MUL_64                   },
1351    {op1v_flt64_to_flt32,           ALU_OP1_FLT64_TO_FLT32           },
1352    {op1v_flt32_to_flt64,           ALU_OP1_FLT32_TO_FLT64           },
1353    {op2_prede_int,                 ALU_OP2_PRED_SETE_INT            },
1354    {op2_pred_setne_int,            ALU_OP2_PRED_SETNE_INT           },
1355    {op2_pred_setge_int,            ALU_OP2_PRED_SETGE_INT           },
1356    {op2_pred_setgt_int,            ALU_OP2_PRED_SETGT_INT           },
1357    {op2_pred_setgt_uint,           ALU_OP2_PRED_SETGT_UINT          },
1358    {op2_pred_setge_uint,           ALU_OP2_PRED_SETGE_UINT          },
1359    {op2_pred_sete,                 ALU_OP2_PRED_SETE                },
1360    {op2_pred_setgt,                ALU_OP2_PRED_SETGT               },
1361    {op2_pred_setge,                ALU_OP2_PRED_SETGE               },
1362    {op2_pred_setne,                ALU_OP2_PRED_SETNE               },
1363    {op0_pred_set_clr,              ALU_OP0_PRED_SET_CLR             },
1364    {op1_pred_set_restore,          ALU_OP1_PRED_SET_RESTORE         },
1365    {op2_pred_sete_push,            ALU_OP2_PRED_SETE_PUSH           },
1366    {op2_pred_setgt_push,           ALU_OP2_PRED_SETGT_PUSH          },
1367    {op2_pred_setge_push,           ALU_OP2_PRED_SETGE_PUSH          },
1368    {op2_pred_setne_push,           ALU_OP2_PRED_SETNE_PUSH          },
1369    {op2_kille,                     ALU_OP2_KILLE                    },
1370    {op2_killgt,                    ALU_OP2_KILLGT                   },
1371    {op2_killge,                    ALU_OP2_KILLGE                   },
1372    {op2_killne,                    ALU_OP2_KILLNE                   },
1373    {op2_and_int,                   ALU_OP2_AND_INT                  },
1374    {op2_or_int,                    ALU_OP2_OR_INT                   },
1375    {op2_xor_int,                   ALU_OP2_XOR_INT                  },
1376    {op1_not_int,                   ALU_OP1_NOT_INT                  },
1377    {op2_add_int,                   ALU_OP2_ADD_INT                  },
1378    {op2_sub_int,                   ALU_OP2_SUB_INT                  },
1379    {op2_max_int,                   ALU_OP2_MAX_INT                  },
1380    {op2_min_int,                   ALU_OP2_MIN_INT                  },
1381    {op2_max_uint,                  ALU_OP2_MAX_UINT                 },
1382    {op2_min_uint,                  ALU_OP2_MIN_UINT                 },
1383    {op2_sete_int,                  ALU_OP2_SETE_INT                 },
1384    {op2_setgt_int,                 ALU_OP2_SETGT_INT                },
1385    {op2_setge_int,                 ALU_OP2_SETGE_INT                },
1386    {op2_setne_int,                 ALU_OP2_SETNE_INT                },
1387    {op2_setgt_uint,                ALU_OP2_SETGT_UINT               },
1388    {op2_setge_uint,                ALU_OP2_SETGE_UINT               },
1389    {op2_killgt_uint,               ALU_OP2_KILLGT_UINT              },
1390    {op2_killge_uint,               ALU_OP2_KILLGE_UINT              },
1391    {op2_pred_setgt_int,            ALU_OP2_PRED_SETGT_INT           },
1392    {op2_pred_setge_int,            ALU_OP2_PRED_SETGE_INT           },
1393    {op2_pred_setne_int,            ALU_OP2_PRED_SETNE_INT           },
1394    {op2_kille_int,                 ALU_OP2_KILLE_INT                },
1395    {op2_killgt_int,                ALU_OP2_KILLGT_INT               },
1396    {op2_killge_int,                ALU_OP2_KILLGE_INT               },
1397    {op2_killne_int,                ALU_OP2_KILLNE_INT               },
1398    {op2_pred_sete_push_int,        ALU_OP2_PRED_SETE_PUSH_INT       },
1399    {op2_pred_setgt_push_int,       ALU_OP2_PRED_SETGT_PUSH_INT      },
1400    {op2_pred_setge_push_int,       ALU_OP2_PRED_SETGE_PUSH_INT      },
1401    {op2_pred_setne_push_int,       ALU_OP2_PRED_SETNE_PUSH_INT      },
1402    {op2_pred_setlt_push_int,       ALU_OP2_PRED_SETLT_PUSH_INT      },
1403    {op2_pred_setle_push_int,       ALU_OP2_PRED_SETLE_PUSH_INT      },
1404    {op1_flt_to_int,                ALU_OP1_FLT_TO_INT               },
1405    {op1_bfrev_int,                 ALU_OP1_BFREV_INT                },
1406    {op2_addc_uint,                 ALU_OP2_ADDC_UINT                },
1407    {op2_subb_uint,                 ALU_OP2_SUBB_UINT                },
1408    {op0_group_barrier,             ALU_OP0_GROUP_BARRIER            },
1409    {op0_group_seq_begin,           ALU_OP0_GROUP_SEQ_BEGIN          },
1410    {op0_group_seq_end,             ALU_OP0_GROUP_SEQ_END            },
1411    {op2_set_mode,                  ALU_OP2_SET_MODE                 },
1412    {op1_set_cf_idx0,               ALU_OP0_SET_CF_IDX0              },
1413    {op1_set_cf_idx1,               ALU_OP0_SET_CF_IDX1              },
1414    {op2_set_lds_size,              ALU_OP2_SET_LDS_SIZE             },
1415    {op1_exp_ieee,                  ALU_OP1_EXP_IEEE                 },
1416    {op1_log_clamped,               ALU_OP1_LOG_CLAMPED              },
1417    {op1_log_ieee,                  ALU_OP1_LOG_IEEE                 },
1418    {op1_recip_clamped,             ALU_OP1_RECIP_CLAMPED            },
1419    {op1_recip_ff,                  ALU_OP1_RECIP_FF                 },
1420    {op1_recip_ieee,                ALU_OP1_RECIP_IEEE               },
1421    {op1_recipsqrt_clamped,         ALU_OP1_RECIPSQRT_CLAMPED        },
1422    {op1_recipsqrt_ff,              ALU_OP1_RECIPSQRT_FF             },
1423    {op1_recipsqrt_ieee1,           ALU_OP1_RECIPSQRT_IEEE           },
1424    {op1_sqrt_ieee,                 ALU_OP1_SQRT_IEEE                },
1425    {op1_sin,                       ALU_OP1_SIN                      },
1426    {op1_cos,                       ALU_OP1_COS                      },
1427    {op2_mullo_int,                 ALU_OP2_MULLO_INT                },
1428    {op2_mulhi_int,                 ALU_OP2_MULHI_INT                },
1429    {op2_mullo_uint,                ALU_OP2_MULLO_UINT               },
1430    {op2_mulhi_uint,                ALU_OP2_MULHI_UINT               },
1431    {op1_recip_int,                 ALU_OP1_RECIP_INT                },
1432    {op1_recip_uint,                ALU_OP1_RECIP_UINT               },
1433    {op1_recip_64,                  ALU_OP2_RECIP_64                 },
1434    {op1_recip_clamped_64,          ALU_OP2_RECIP_CLAMPED_64         },
1435    {op1_recipsqrt_64,              ALU_OP2_RECIPSQRT_64             },
1436    {op1_recipsqrt_clamped_64,      ALU_OP2_RECIPSQRT_CLAMPED_64     },
1437    {op1_sqrt_64,                   ALU_OP2_SQRT_64                  },
1438    {op1_flt_to_uint,               ALU_OP1_FLT_TO_UINT              },
1439    {op1_int_to_flt,                ALU_OP1_INT_TO_FLT               },
1440    {op1_uint_to_flt,               ALU_OP1_UINT_TO_FLT              },
1441    {op2_bfm_int,                   ALU_OP2_BFM_INT                  },
1442    {op1_flt32_to_flt16,            ALU_OP1_FLT32_TO_FLT16           },
1443    {op1_flt16_to_flt32,            ALU_OP1_FLT16_TO_FLT32           },
1444    {op1_ubyte0_flt,                ALU_OP1_UBYTE0_FLT               },
1445    {op1_ubyte1_flt,                ALU_OP1_UBYTE1_FLT               },
1446    {op1_ubyte2_flt,                ALU_OP1_UBYTE2_FLT               },
1447    {op1_ubyte3_flt,                ALU_OP1_UBYTE3_FLT               },
1448    {op1_bcnt_int,                  ALU_OP1_BCNT_INT                 },
1449    {op1_ffbh_uint,                 ALU_OP1_FFBH_UINT                },
1450    {op1_ffbl_int,                  ALU_OP1_FFBL_INT                 },
1451    {op1_ffbh_int,                  ALU_OP1_FFBH_INT                 },
1452    {op1_flt_to_uint4,              ALU_OP1_FLT_TO_UINT4             },
1453    {op2_dot_ieee,                  ALU_OP2_DOT_IEEE                 },
1454    {op1_flt_to_int_rpi,            ALU_OP1_FLT_TO_INT_RPI           },
1455    {op1_flt_to_int_floor,          ALU_OP1_FLT_TO_INT_FLOOR         },
1456    {op2_mulhi_uint24,              ALU_OP2_MULHI_UINT24             },
1457    {op1_mbcnt_32hi_int,            ALU_OP1_MBCNT_32HI_INT           },
1458    {op1_offset_to_flt,             ALU_OP1_OFFSET_TO_FLT            },
1459    {op2_mul_uint24,                ALU_OP2_MUL_UINT24               },
1460    {op1_bcnt_accum_prev_int,       ALU_OP1_BCNT_ACCUM_PREV_INT      },
1461    {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT},
1462    {op2_sete_64,                   ALU_OP2_SETE_64                  },
1463    {op2_setne_64,                  ALU_OP2_SETNE_64                 },
1464    {op2_setgt_64,                  ALU_OP2_SETGT_64                 },
1465    {op2_setge_64,                  ALU_OP2_SETGE_64                 },
1466    {op2_min_64,                    ALU_OP2_MIN_64                   },
1467    {op2_max_64,                    ALU_OP2_MAX_64                   },
1468    {op2_dot4,                      ALU_OP2_DOT4                     },
1469    {op2_dot4_ieee,                 ALU_OP2_DOT4_IEEE                },
1470    {op2_cube,                      ALU_OP2_CUBE                     },
1471    {op1_max4,                      ALU_OP1_MAX4                     },
1472    {op1_frexp_64,                  ALU_OP1_FREXP_64                 },
1473    {op1_ldexp_64,                  ALU_OP2_LDEXP_64                 },
1474    {op1_fract_64,                  ALU_OP1_FRACT_64                 },
1475    {op2_pred_setgt_64,             ALU_OP2_PRED_SETGT_64            },
1476    {op2_pred_sete_64,              ALU_OP2_PRED_SETE_64             },
1477    {op2_pred_setge_64,             ALU_OP2_PRED_SETGE_64            },
1478    {op2_add_64,                    ALU_OP2_ADD_64                   },
1479    {op1_mova_int,                  ALU_OP1_MOVA_INT                 },
1480    {op1v_flt64_to_flt32,           ALU_OP1_FLT64_TO_FLT32           },
1481    {op1_flt32_to_flt64,            ALU_OP1_FLT32_TO_FLT64           },
1482    {op2_sad_accum_prev_uint,       ALU_OP2_SAD_ACCUM_PREV_UINT      },
1483    {op2_dot,                       ALU_OP2_DOT                      },
1484    {op1_mul_prev,                  ALU_OP1_MUL_PREV                 },
1485    {op1_mul_ieee_prev,             ALU_OP1_MUL_IEEE_PREV            },
1486    {op1_add_prev,                  ALU_OP1_ADD_PREV                 },
1487    {op2_muladd_prev,               ALU_OP2_MULADD_PREV              },
1488    {op2_muladd_ieee_prev,          ALU_OP2_MULADD_IEEE_PREV         },
1489    {op2_interp_xy,                 ALU_OP2_INTERP_XY                },
1490    {op2_interp_zw,                 ALU_OP2_INTERP_ZW                },
1491    {op2_interp_x,                  ALU_OP2_INTERP_X                 },
1492    {op2_interp_z,                  ALU_OP2_INTERP_Z                 },
1493    {op0_store_flags,               ALU_OP1_STORE_FLAGS              },
1494    {op1_load_store_flags,          ALU_OP1_LOAD_STORE_FLAGS         },
1495    {op0_lds_1a,                    ALU_OP2_LDS_1A                   },
1496    {op0_lds_1a1d,                  ALU_OP2_LDS_1A1D                 },
1497    {op0_lds_2a,                    ALU_OP2_LDS_2A                   },
1498    {op1_interp_load_p0,            ALU_OP1_INTERP_LOAD_P0           },
1499    {op1_interp_load_p10,           ALU_OP1_INTERP_LOAD_P10          },
1500    {op1_interp_load_p20,           ALU_OP1_INTERP_LOAD_P20          },
1501    {op3_bfe_uint,                  ALU_OP3_BFE_UINT                 },
1502    {op3_bfe_int,                   ALU_OP3_BFE_INT                  },
1503    {op3_bfi_int,                   ALU_OP3_BFI_INT                  },
1504    {op3_fma,                       ALU_OP3_FMA                      },
1505    {op3_cndne_64,                  ALU_OP3_CNDNE_64                 },
1506    {op3_fma_64,                    ALU_OP3_FMA_64                   },
1507    {op3_lerp_uint,                 ALU_OP3_LERP_UINT                },
1508    {op3_bit_align_int,             ALU_OP3_BIT_ALIGN_INT            },
1509    {op3_byte_align_int,            ALU_OP3_BYTE_ALIGN_INT           },
1510    {op3_sad_accum_uint,            ALU_OP3_SAD_ACCUM_UINT           },
1511    {op3_sad_accum_hi_uint,         ALU_OP3_SAD_ACCUM_HI_UINT        },
1512    {op3_muladd_uint24,             ALU_OP3_MULADD_UINT24            },
1513    {op3_lds_idx_op,                ALU_OP3_LDS_IDX_OP               },
1514    {op3_muladd,                    ALU_OP3_MULADD                   },
1515    {op3_muladd_m2,                 ALU_OP3_MULADD_M2                },
1516    {op3_muladd_m4,                 ALU_OP3_MULADD_M4                },
1517    {op3_muladd_d2,                 ALU_OP3_MULADD_D2                },
1518    {op3_muladd_ieee,               ALU_OP3_MULADD_IEEE              },
1519    {op3_cnde,                      ALU_OP3_CNDE                     },
1520    {op3_cndgt,                     ALU_OP3_CNDGT                    },
1521    {op3_cndge,                     ALU_OP3_CNDGE                    },
1522    {op3_cnde_int,                  ALU_OP3_CNDE_INT                 },
1523    {op3_cndgt_int,                 ALU_OP3_CNDGT_INT                },
1524    {op3_cndge_int,                 ALU_OP3_CNDGE_INT                },
1525    {op3_mul_lit,                   ALU_OP3_MUL_LIT                  },
1526 };
1527 
1528 const std::map<ESDOp, int> ds_opcode_map = {
1529    {DS_OP_ADD,                      FETCH_OP_GDS_ADD                 },
1530    {DS_OP_SUB,                      FETCH_OP_GDS_SUB                 },
1531    {DS_OP_RSUB,                     FETCH_OP_GDS_RSUB                },
1532    {DS_OP_INC,                      FETCH_OP_GDS_INC                 },
1533    {DS_OP_DEC,                      FETCH_OP_GDS_DEC                 },
1534    {DS_OP_MIN_INT,                  FETCH_OP_GDS_MIN_INT             },
1535    {DS_OP_MAX_INT,                  FETCH_OP_GDS_MAX_INT             },
1536    {DS_OP_MIN_UINT,                 FETCH_OP_GDS_MIN_UINT            },
1537    {DS_OP_MAX_UINT,                 FETCH_OP_GDS_MAX_UINT            },
1538    {DS_OP_AND,                      FETCH_OP_GDS_AND                 },
1539    {DS_OP_OR,                       FETCH_OP_GDS_OR                  },
1540    {DS_OP_XOR,                      FETCH_OP_GDS_XOR                 },
1541    {DS_OP_MSKOR,                    FETCH_OP_GDS_MSKOR               },
1542    {DS_OP_WRITE,                    FETCH_OP_GDS_WRITE               },
1543    {DS_OP_WRITE_REL,                FETCH_OP_GDS_WRITE_REL           },
1544    {DS_OP_WRITE2,                   FETCH_OP_GDS_WRITE2              },
1545    {DS_OP_CMP_STORE,                FETCH_OP_GDS_CMP_STORE           },
1546    {DS_OP_CMP_STORE_SPF,            FETCH_OP_GDS_CMP_STORE_SPF       },
1547    {DS_OP_BYTE_WRITE,               FETCH_OP_GDS_BYTE_WRITE          },
1548    {DS_OP_SHORT_WRITE,              FETCH_OP_GDS_SHORT_WRITE         },
1549    {DS_OP_ADD_RET,                  FETCH_OP_GDS_ADD_RET             },
1550    {DS_OP_SUB_RET,                  FETCH_OP_GDS_SUB_RET             },
1551    {DS_OP_RSUB_RET,                 FETCH_OP_GDS_RSUB_RET            },
1552    {DS_OP_INC_RET,                  FETCH_OP_GDS_INC_RET             },
1553    {DS_OP_DEC_RET,                  FETCH_OP_GDS_DEC_RET             },
1554    {DS_OP_MIN_INT_RET,              FETCH_OP_GDS_MIN_INT_RET         },
1555    {DS_OP_MAX_INT_RET,              FETCH_OP_GDS_MAX_INT_RET         },
1556    {DS_OP_MIN_UINT_RET,             FETCH_OP_GDS_MIN_UINT_RET        },
1557    {DS_OP_MAX_UINT_RET,             FETCH_OP_GDS_MAX_UINT_RET        },
1558    {DS_OP_AND_RET,                  FETCH_OP_GDS_AND_RET             },
1559    {DS_OP_OR_RET,                   FETCH_OP_GDS_OR_RET              },
1560    {DS_OP_XOR_RET,                  FETCH_OP_GDS_XOR_RET             },
1561    {DS_OP_MSKOR_RET,                FETCH_OP_GDS_MSKOR_RET           },
1562    {DS_OP_XCHG_RET,                 FETCH_OP_GDS_XCHG_RET            },
1563    {DS_OP_XCHG_REL_RET,             FETCH_OP_GDS_XCHG_REL_RET        },
1564    {DS_OP_XCHG2_RET,                FETCH_OP_GDS_XCHG2_RET           },
1565    {DS_OP_CMP_XCHG_RET,             FETCH_OP_GDS_CMP_XCHG_RET        },
1566    {DS_OP_CMP_XCHG_SPF_RET,         FETCH_OP_GDS_CMP_XCHG_SPF_RET    },
1567    {DS_OP_READ_RET,                 FETCH_OP_GDS_READ_RET            },
1568    {DS_OP_READ_REL_RET,             FETCH_OP_GDS_READ_REL_RET        },
1569    {DS_OP_READ2_RET,                FETCH_OP_GDS_READ2_RET           },
1570    {DS_OP_READWRITE_RET,            FETCH_OP_GDS_READWRITE_RET       },
1571    {DS_OP_BYTE_READ_RET,            FETCH_OP_GDS_BYTE_READ_RET       },
1572    {DS_OP_UBYTE_READ_RET,           FETCH_OP_GDS_UBYTE_READ_RET      },
1573    {DS_OP_SHORT_READ_RET,           FETCH_OP_GDS_SHORT_READ_RET      },
1574    {DS_OP_USHORT_READ_RET,          FETCH_OP_GDS_USHORT_READ_RET     },
1575    {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC},
1576    {DS_OP_INVALID,                  0                                },
1577 };
1578 
1579 } // namespace r600
1580