• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_assembler.h"
28 #include "sfn_debug.h"
29 #include "sfn_instr_alugroup.h"
30 #include "sfn_instr_controlflow.h"
31 #include "sfn_instr_fetch.h"
32 #include "sfn_instr_export.h"
33 #include "sfn_instr_mem.h"
34 #include "sfn_instr_tex.h"
35 
36 #include "sfn_conditionaljumptracker.h"
37 #include "sfn_callstack.h"
38 
39 #include "../eg_sq.h"
40 
41 namespace r600 {
Assembler(r600_shader * sh,const r600_shader_key & key)42 Assembler::Assembler(r600_shader *sh, const r600_shader_key& key):
43    m_sh(sh), m_key(key)
44 {
45 }
46 
47 extern const std::map<ESDOp, int> ds_opcode_map;
48 
49 class AssamblerVisitor : public ConstInstrVisitor {
50 public:
51    AssamblerVisitor(r600_shader *sh, const r600_shader_key& key);
52 
53    void visit(const AluInstr& instr) override;
54    void visit(const AluGroup& instr) override;
55    void visit(const TexInstr& instr) override;
56    void visit(const ExportInstr& instr) override;
57    void visit(const FetchInstr& instr) override;
58    void visit(const Block& instr) override;
59    void visit(const IfInstr& instr) override;
60    void visit(const ControlFlowInstr& instr) override;
61    void visit(const ScratchIOInstr& instr) override;
62    void visit(const StreamOutInstr& instr) override;
63    void visit(const MemRingOutInstr& instr) override;
64    void visit(const EmitVertexInstr& instr) override;
65    void visit(const GDSInstr& instr) override;
66    void visit(const WriteTFInstr& instr) override;
67    void visit(const LDSAtomicInstr& instr) override;
68    void visit(const LDSReadInstr& instr) override;
69    void visit(const RatInstr& instr) override;
70 
71    void finalize();
72 
73    const uint32_t sf_vtx = 1;
74    const uint32_t sf_tex = 2;
75    const uint32_t sf_alu = 4;
76    const uint32_t sf_addr_register = 8;
77    const uint32_t sf_all = 0xf;
78 
79    void clear_states(const uint32_t& states);
80    bool copy_dst(r600_bytecode_alu_dst& dst, const Register& d, bool write);
81    PVirtualValue copy_src(r600_bytecode_alu_src& src, const VirtualValue& s);
82 
83    EBufferIndexMode
84    emit_index_reg(const VirtualValue& addr, unsigned idx);
85 
86    void emit_endif();
87    void emit_else();
88    void emit_loop_begin(bool vpm);
89    void emit_loop_end();
90    void emit_loop_break();
91    void emit_loop_cont();
92 
93    void emit_alu_op(const AluInstr& ai);
94    void emit_lds_op(const AluInstr& lds);
95 
96    void emit_wait_ack();
97 
98    /* Start initialized in constructor */
99    const r600_shader_key& m_key;
100    r600_shader *m_shader;
101    r600_bytecode *m_bc;
102 
103    ConditionalJumpTracker m_jump_tracker;
104    CallStack m_callstack;
105    bool ps_alpha_to_one;
106    /* End initialized in constructor */
107 
108    std::set<uint32_t> m_nliterals_in_group;
109    std::set<int> vtx_fetch_results;
110    std::set<int> tex_fetch_results;
111 
112    PRegister m_last_addr{nullptr};
113 
114    unsigned m_max_color_exports{0};
115    int m_loop_nesting{0};
116 
117    bool m_ack_suggested{false};
118    bool m_has_param_output{false};
119    bool m_has_pos_output{false};
120    bool m_last_op_was_barrier{false};
121    bool m_result{true};
122 };
123 
lower(Shader * shader)124 bool Assembler::lower(Shader *shader)
125 {
126    AssamblerVisitor ass(m_sh, m_key);
127 
128    auto& blocks = shader->func();
129    for (auto b : blocks) {
130       b->accept(ass);
131       if (!ass.m_result)
132          return false;
133    }
134 
135    ass.finalize();
136 
137    return ass.m_result;
138 
139 }
140 
AssamblerVisitor(r600_shader * sh,const r600_shader_key & key)141 AssamblerVisitor::AssamblerVisitor(r600_shader *sh, const r600_shader_key& key):
142    m_key(key),
143    m_shader(sh),
144 
145    m_bc(&sh->bc),
146    m_callstack(sh->bc),
147    ps_alpha_to_one(key.ps.alpha_to_one)
148 {
149    if (m_shader->processor_type == PIPE_SHADER_FRAGMENT)
150       m_max_color_exports = MAX2(m_key.ps.nr_cbufs, 1);
151 
152    if (m_shader->processor_type == PIPE_SHADER_VERTEX &&
153        m_shader->ninput > 0)
154          r600_bytecode_add_cfinst(m_bc, CF_OP_CALL_FS);
155 }
156 
finalize()157 void AssamblerVisitor::finalize()
158 {
159    const struct cf_op_info *last = nullptr;
160 
161    if (m_bc->cf_last)
162       last = r600_isa_cf(m_bc->cf_last->op);
163 
164    /* alu clause instructions don't have EOP bit, so add NOP */
165    if (m_shader->bc.gfx_level < CAYMAN &&
166        (!last || last->flags & CF_ALU || m_bc->cf_last->op == CF_OP_LOOP_END
167        || m_bc->cf_last->op == CF_OP_POP))
168       r600_bytecode_add_cfinst(m_bc, CF_OP_NOP);
169 
170    /* A fetch shader only can't be EOP (results in hang), but we can replace it
171         * by a NOP */
172    else if (last && m_bc->cf_last->op == CF_OP_CALL_FS)
173       m_bc->cf_last->op = CF_OP_NOP;
174 
175    if (m_shader->bc.gfx_level != CAYMAN)
176       m_bc->cf_last->end_of_program = 1;
177    else
178       cm_bytecode_add_cf_end(m_bc);
179 }
180 
181 extern const std::map<EAluOp, int> opcode_map;
182 
visit(const AluInstr & ai)183 void AssamblerVisitor::visit(const AluInstr& ai)
184 {
185    assert(vtx_fetch_results.empty());
186    assert(tex_fetch_results.empty());
187 
188    if (unlikely(ai.has_alu_flag(alu_is_lds)))
189       emit_lds_op(ai);
190    else
191       emit_alu_op(ai);
192 }
193 
emit_lds_op(const AluInstr & lds)194 void AssamblerVisitor::emit_lds_op(const AluInstr& lds)
195 {
196    struct r600_bytecode_alu alu;
197    memset(&alu, 0, sizeof(alu));
198 
199    alu.is_lds_idx_op = true;
200    alu.op = lds.lds_opcode();
201 
202    bool has_lds_fetch = false;
203    switch (alu.op) {
204    case LDS_WRITE:
205       alu.op =LDS_OP2_LDS_WRITE;
206       break;
207    case LDS_WRITE_REL:
208       alu.op = LDS_OP3_LDS_WRITE_REL;
209       alu.lds_idx = 1;
210       break;
211    case DS_OP_READ_RET:
212       alu.op = LDS_OP1_LDS_READ_RET;
213       FALLTHROUGH;
214    case LDS_ADD_RET:
215    case LDS_AND_RET:
216    case LDS_OR_RET:
217    case LDS_MAX_INT_RET:
218    case LDS_MAX_UINT_RET:
219    case LDS_MIN_INT_RET:
220    case LDS_MIN_UINT_RET:
221    case LDS_XOR_RET:
222    case LDS_XCHG_RET:
223    case LDS_CMP_XCHG_RET:
224       has_lds_fetch = true;
225       break;
226    case LDS_ADD:
227    case LDS_AND:
228    case LDS_OR:
229    case LDS_MAX_INT:
230    case LDS_MAX_UINT:
231    case LDS_MIN_INT:
232    case LDS_MIN_UINT:
233    case LDS_XOR:
234       break;
235    default:
236       std::cerr << "\n R600: error op: " << lds << "\n";
237       unreachable("Unhandled LDS op");
238    }
239 
240    copy_src(alu.src[0], lds.src(0));
241 
242    if (lds.n_sources() > 1)
243       copy_src(alu.src[1], lds.src(1));
244    else
245       alu.src[1].sel = V_SQ_ALU_SRC_0;
246 
247    if (lds.n_sources() > 2)
248       copy_src(alu.src[2], lds.src(2));
249    else
250       alu.src[2].sel = V_SQ_ALU_SRC_0;
251 
252    alu.last = lds.has_alu_flag(alu_last_instr);
253 
254    int r = r600_bytecode_add_alu(m_bc, &alu);
255    if (has_lds_fetch)
256       m_bc->cf_last->nlds_read++;
257 
258    if (r)
259       m_result = false;
260 }
261 
emit_alu_op(const AluInstr & ai)262 void AssamblerVisitor::emit_alu_op(const AluInstr& ai)
263 {
264    struct r600_bytecode_alu alu;
265    memset(&alu, 0, sizeof(alu));
266 
267    if (opcode_map.find(ai.opcode()) == opcode_map.end()) {
268       std::cerr << "Opcode not handled for " << ai <<"\n";
269       m_result = false;
270       return;
271    }
272 
273    // skip multiple barriers
274    if (m_last_op_was_barrier && ai.opcode() == op0_group_barrier)
275       return;
276 
277    m_last_op_was_barrier = ai.opcode() == op0_group_barrier;
278 
279    alu.op = opcode_map.at(ai.opcode());
280 
281    auto dst = ai.dest();
282    if (dst) {
283       if (!copy_dst(alu.dst, *dst, ai.has_alu_flag(alu_write))) {
284          m_result = false;
285          return;
286       }
287 
288       alu.dst.write = ai.has_alu_flag(alu_write);
289       alu.dst.clamp = ai.has_alu_flag(alu_dst_clamp);
290       alu.dst.rel = dst->addr() ? 1 : 0;
291    } else {
292       alu.dst.chan = ai.dest_chan();
293    }
294 
295    alu.is_op3 = ai.n_sources() == 3;
296 
297    EBufferIndexMode kcache_index_mode = bim_none;
298    PVirtualValue buffer_offset = nullptr;
299 
300    for (unsigned i = 0; i < ai.n_sources(); ++i) {
301       buffer_offset = copy_src(alu.src[i], ai.src(i));
302       alu.src[i].neg = ai.has_alu_flag(AluInstr::src_neg_flags[i]);
303       if (!alu.is_op3)
304          alu.src[i].abs = ai.has_alu_flag(AluInstr::src_abs_flags[i]);
305 
306       if (buffer_offset && kcache_index_mode == bim_none) {
307          kcache_index_mode = bim_zero;
308          alu.src[i].kc_bank = 1;
309          alu.src[i].kc_rel = 1;
310       }
311 
312       if (ai.has_lds_queue_read()) {
313          assert(m_bc->cf_last->nlds_read > 0);
314          m_bc->cf_last->nlds_read--;
315       }
316    }
317 
318    if (ai.bank_swizzle() != alu_vec_unknown)
319       alu.bank_swizzle_force = ai.bank_swizzle();
320 
321    alu.last = ai.has_alu_flag(alu_last_instr);
322    alu.execute_mask = ai.has_alu_flag(alu_update_exec);
323 
324    /* If the destination register is equal to the last loaded address register
325     * then clear the latter one, because the values will no longer be identical */
326    if (m_last_addr)
327       sfn_log << SfnLog::assembly << "  Current address register is " << *m_last_addr << "\n";
328 
329    if (dst)
330       sfn_log << SfnLog::assembly << "  Current dst register is " << *dst << "\n";
331 
332    if (dst && m_last_addr && *dst == *m_last_addr) {
333       sfn_log << SfnLog::assembly << "  Clear address register (was " << *m_last_addr << "\n";
334       m_last_addr = nullptr;
335    }
336 
337    auto cf_op = ai.cf_type();
338 
339    unsigned type = 0;
340    switch (cf_op) {
341    case cf_alu: type = CF_OP_ALU; break;
342    case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break;
343    case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break;
344    case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break;
345    case cf_alu_break: type = CF_OP_ALU_BREAK; break;
346    case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break;
347    case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break;
348    case cf_alu_extended: type = CF_OP_ALU_EXT; break;
349    default:
350       assert(0 && "cf_alu_undefined should have been replaced");
351    }
352 
353    if (alu.last)
354       m_nliterals_in_group.clear();
355 
356 
357    m_result = !r600_bytecode_add_alu_type(m_bc, &alu, type);
358 
359    if (ai.opcode() == op1_mova_int)
360       m_bc->ar_loaded = 0;
361 
362    if (ai.opcode() == op1_set_cf_idx0)
363       m_bc->index_loaded[0] = 1;
364 
365    if (ai.opcode() == op1_set_cf_idx1)
366       m_bc->index_loaded[1] = 1;
367 
368    m_bc->force_add_cf |= (ai.opcode() == op2_kille ||
369                           ai.opcode() == op2_killne_int ||
370                           ai.opcode() == op1_set_cf_idx0 ||
371                           ai.opcode() == op1_set_cf_idx1);
372 }
373 
visit(const AluGroup & group)374 void AssamblerVisitor::visit(const AluGroup& group)
375 {
376    clear_states(sf_vtx | sf_tex);
377 
378    if (group.slots() == 0)
379       return;
380 
381    if (group.has_lds_group_start()) {
382       if (m_bc->cf_last->ndw + 2 * (*group.begin())->required_slots() > 220) {
383          assert(m_bc->cf_last->nlds_read == 0);
384          m_bc->force_add_cf = 1;
385          m_last_addr = nullptr;
386       }
387    } else if (m_bc->cf_last) {
388       if (m_bc->cf_last->ndw + 2 * group.slots() > 240) {
389          assert(m_bc->cf_last->nlds_read == 0);
390          m_bc->force_add_cf = 1;
391          m_last_addr = nullptr;
392       } else {
393          auto instr = *group.begin();
394          if (instr &&
395              !instr->has_alu_flag(alu_is_lds) &&
396              instr->opcode() == op0_group_barrier &&
397              m_bc->cf_last->ndw + 14 > 240) {
398             assert(m_bc->cf_last->nlds_read == 0);
399             m_bc->force_add_cf = 1;
400             m_last_addr = nullptr;
401          }
402       }
403    }
404 
405    auto addr = group.addr();
406 
407    if (addr.first) {
408       if (!addr.second) {
409          if (!m_last_addr || !m_bc->ar_loaded ||
410              !m_last_addr->equal_to(*addr.first)) {
411             m_bc->ar_reg = addr.first->sel();
412             m_bc->ar_chan = addr.first->chan();
413             m_last_addr = addr.first;
414             m_bc->ar_loaded = 0;
415 
416             r600_load_ar(m_bc, group.addr_for_src());
417          }
418       } else {
419          emit_index_reg(*addr.first, 0);
420       }
421    }
422 
423    for (auto& i : group) {
424       if (i)
425          i->accept(*this);
426    }
427 }
428 
visit(const TexInstr & tex_instr)429 void AssamblerVisitor::visit(const TexInstr& tex_instr)
430 {
431    clear_states(sf_vtx | sf_alu);
432 
433    int sampler_offset = 0;
434    auto addr = tex_instr.sampler_offset();
435    EBufferIndexMode index_mode = bim_none;
436 
437    if (addr)
438       index_mode = emit_index_reg(*addr, 1);
439 
440    if (tex_fetch_results.find(tex_instr.src().sel()) !=
441        tex_fetch_results.end()) {
442       m_bc->force_add_cf = 1;
443       tex_fetch_results.clear();
444    }
445 
446    r600_bytecode_tex tex;
447    memset(&tex, 0, sizeof(struct r600_bytecode_tex));
448    tex.op = tex_instr.opcode();
449    tex.sampler_id = tex_instr.sampler_id() + sampler_offset;
450    tex.resource_id = tex_instr.resource_id() + sampler_offset;
451    tex.src_gpr = tex_instr.src().sel();
452    tex.dst_gpr = tex_instr.dst().sel();
453    tex.dst_sel_x = tex_instr.dest_swizzle(0);
454    tex.dst_sel_y = tex_instr.dest_swizzle(1);
455    tex.dst_sel_z = tex_instr.dest_swizzle(2);
456    tex.dst_sel_w = tex_instr.dest_swizzle(3);
457    tex.src_sel_x = tex_instr.src()[0]->chan();
458    tex.src_sel_y = tex_instr.src()[1]->chan();
459    tex.src_sel_z = tex_instr.src()[2]->chan();
460    tex.src_sel_w = tex_instr.src()[3]->chan();
461    tex.coord_type_x = !tex_instr.has_tex_flag(TexInstr::x_unnormalized);
462    tex.coord_type_y = !tex_instr.has_tex_flag(TexInstr::y_unnormalized);
463    tex.coord_type_z = !tex_instr.has_tex_flag(TexInstr::z_unnormalized);
464    tex.coord_type_w = !tex_instr.has_tex_flag(TexInstr::w_unnormalized);
465    tex.offset_x = tex_instr.get_offset(0);
466    tex.offset_y = tex_instr.get_offset(1);
467    tex.offset_z = tex_instr.get_offset(2);
468    tex.resource_index_mode = index_mode;
469    tex.sampler_index_mode = index_mode;
470 
471    if (tex.dst_sel_x < 4 &&
472        tex.dst_sel_y < 4 &&
473        tex.dst_sel_z < 4 &&
474        tex.dst_sel_w < 4)
475       tex_fetch_results.insert(tex.dst_gpr);
476 
477    if (tex_instr.opcode() == TexInstr::get_gradient_h ||
478        tex_instr.opcode() == TexInstr::get_gradient_v)
479       tex.inst_mod = tex_instr.has_tex_flag(TexInstr::grad_fine) ? 1 : 0;
480    else
481       tex.inst_mod = tex_instr.inst_mode();
482    if (r600_bytecode_add_tex(m_bc, &tex)) {
483       R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
484       m_result = false;
485    }
486 }
487 
visit(const ExportInstr & exi)488 void AssamblerVisitor::visit(const ExportInstr& exi)
489 {
490    const auto& value = exi.value();
491 
492    r600_bytecode_output output;
493    memset(&output, 0, sizeof(output));
494 
495    output.gpr = value.sel();
496    output.elem_size = 3;
497    output.swizzle_x = value[0]->chan();
498    output.swizzle_y = value[1]->chan();
499    output.swizzle_z = value[2]->chan();
500    output.burst_count = 1;
501    output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
502    output.type = exi.export_type();
503 
504 
505    clear_states(sf_all);
506    switch (exi.export_type()) {
507    case ExportInstr::pixel:
508       output.swizzle_w = ps_alpha_to_one ? 5 : exi.value()[3]->chan();
509       output.array_base = exi.location();
510    break;
511    case ExportInstr::pos:
512       output.swizzle_w = exi.value()[3]->chan();
513       output.array_base = 60 + exi.location();
514    break;
515    case ExportInstr::param:
516       output.swizzle_w = exi.value()[3]->chan();
517       output.array_base = exi.location();
518    break;
519    default:
520       R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type());
521       m_result = false;
522    }
523 
524    /* If all register elements pinned to fixed values
525     * we can override the gpr (the register allocator doesn't see
526     * this because it doesn't take these channels into account. */
527    if (output.swizzle_x > 3 && output.swizzle_y > 3 &&
528        output.swizzle_z > 3 && output.swizzle_w > 3)
529        output.gpr = 0;
530 
531    int r = 0;
532    if ((r =r600_bytecode_add_output(m_bc, &output))) {
533       R600_ERR("Error adding export at location %d : err: %d\n", exi.location(), r);
534       m_result = false;
535    }
536 }
537 
visit(const ScratchIOInstr & instr)538 void AssamblerVisitor::visit(const ScratchIOInstr& instr)
539 {
540    clear_states(sf_all);
541 
542    struct r600_bytecode_output cf;
543 
544    memset(&cf, 0, sizeof(struct r600_bytecode_output));
545 
546    cf.op = CF_OP_MEM_SCRATCH;
547    cf.elem_size = 3;
548    cf.gpr = instr.value().sel();
549    cf.mark = !instr.is_read();
550    cf.comp_mask = instr.is_read() ? 0xf : instr.write_mask();
551    cf.swizzle_x = 0;
552    cf.swizzle_y = 1;
553    cf.swizzle_z = 2;
554    cf.swizzle_w = 3;
555    cf.burst_count = 1;
556 
557    assert(!instr.is_read() || m_bc->gfx_level < R700);
558 
559    if (instr.address()) {
560       cf.type = instr.is_read() || m_bc->gfx_level > R600 ? 3 : 1;
561       cf.index_gpr = instr.address()->sel();
562 
563       /* The docu seems to be wrong here: In indirect addressing the
564        * address_base seems to be the array_size */
565       cf.array_size = instr.array_size();
566    } else {
567       cf.type = instr.is_read() || m_bc->gfx_level > R600 ? 2 : 0;
568       cf.array_base = instr.location();
569    }
570 
571    if (r600_bytecode_add_output(m_bc, &cf)){
572       R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
573       m_result = false;
574    }
575 }
576 
visit(const StreamOutInstr & instr)577 void AssamblerVisitor::visit(const StreamOutInstr& instr)
578 {
579    struct r600_bytecode_output output;
580    memset(&output, 0, sizeof(struct r600_bytecode_output));
581 
582    output.gpr = instr.value().sel();
583    output.elem_size = instr.element_size();
584    output.array_base = instr.array_base();
585    output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
586    output.burst_count = instr.burst_count();
587    output.array_size = instr.array_size();
588    output.comp_mask = instr.comp_mask();
589    output.op = instr.op(m_shader->bc.gfx_level);
590 
591 
592    if (r600_bytecode_add_output(m_bc, &output))  {
593       R600_ERR("shader_from_nir: Error creating stream output instruction\n");
594       m_result = false;
595    }
596 }
597 
visit(const MemRingOutInstr & instr)598 void AssamblerVisitor::visit(const MemRingOutInstr& instr)
599 {
600    struct r600_bytecode_output output;
601    memset(&output, 0, sizeof(struct r600_bytecode_output));
602 
603    output.gpr = instr.value().sel();
604    output.type = instr.type();
605    output.elem_size = 3;
606    output.comp_mask = 0xf;
607    output.burst_count = 1;
608    output.op = instr.op();
609    if (instr.type() == MemRingOutInstr::mem_write_ind ||
610        instr.type() == MemRingOutInstr::mem_write_ind_ack) {
611       output.index_gpr = instr.index_reg();
612       output.array_size = 0xfff;
613    }
614    output.array_base = instr.array_base();
615 
616    if (r600_bytecode_add_output(m_bc, &output)) {
617       R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
618       m_result = false;
619    }
620 }
621 
visit(const EmitVertexInstr & instr)622 void AssamblerVisitor::visit(const EmitVertexInstr& instr)
623 {
624    int r = r600_bytecode_add_cfinst(m_bc, instr.op());
625    if (!r)
626       m_bc->cf_last->count = instr.stream();
627    else
628       m_result = false;
629    assert(m_bc->cf_last->count < 4);
630 }
631 
visit(const FetchInstr & fetch_instr)632 void AssamblerVisitor::visit(const FetchInstr& fetch_instr)
633 {
634    clear_states(sf_tex | sf_alu);
635 
636    auto buffer_offset = fetch_instr.resource_offset();
637    EBufferIndexMode rat_index_mode = bim_none;
638 
639    if (buffer_offset)
640       rat_index_mode = emit_index_reg(*buffer_offset, 0);
641 
642    if (fetch_instr.has_fetch_flag(FetchInstr::wait_ack))
643       emit_wait_ack();
644 
645    bool use_tc = fetch_instr.has_fetch_flag(FetchInstr::use_tc) ||
646                  (m_bc->gfx_level == CAYMAN);
647    if (!use_tc &&
648        vtx_fetch_results.find(fetch_instr.src().sel()) !=
649        vtx_fetch_results.end()) {
650       m_bc->force_add_cf = 1;
651       vtx_fetch_results.clear();
652    }
653 
654    if (fetch_instr.has_fetch_flag(FetchInstr::use_tc) &&
655        tex_fetch_results.find(fetch_instr.src().sel()) !=
656        tex_fetch_results.end()) {
657       m_bc->force_add_cf = 1;
658       tex_fetch_results.clear();
659    }
660 
661    if (use_tc)
662       tex_fetch_results.insert(fetch_instr.dst().sel());
663    else
664       vtx_fetch_results.insert(fetch_instr.dst().sel());
665 
666    struct r600_bytecode_vtx vtx;
667    memset(&vtx, 0, sizeof(vtx));
668    vtx.op = fetch_instr.opcode();
669    vtx.buffer_id = fetch_instr.resource_id();
670    vtx.fetch_type = fetch_instr.fetch_type();
671    vtx.src_gpr = fetch_instr.src().sel();
672    vtx.src_sel_x = fetch_instr.src().chan();
673    vtx.mega_fetch_count = fetch_instr.mega_fetch_count();
674    vtx.dst_gpr = fetch_instr.dst().sel();
675    vtx.dst_sel_x = fetch_instr.dest_swizzle(0);		/* SEL_X */
676    vtx.dst_sel_y = fetch_instr.dest_swizzle(1);		/* SEL_Y */
677    vtx.dst_sel_z = fetch_instr.dest_swizzle(2);		/* SEL_Z */
678    vtx.dst_sel_w = fetch_instr.dest_swizzle(3);		/* SEL_W */
679    vtx.use_const_fields = fetch_instr.has_fetch_flag(FetchInstr::use_const_field);
680    vtx.data_format = fetch_instr.data_format();
681    vtx.num_format_all = fetch_instr.num_format();		/* NUM_FORMAT_SCALED */
682    vtx.format_comp_all = fetch_instr.has_fetch_flag(FetchInstr::format_comp_signed);
683    vtx.endian = fetch_instr.endian_swap();
684    vtx.buffer_index_mode = rat_index_mode;
685    vtx.offset = fetch_instr.src_offset();
686    vtx.indexed = fetch_instr.has_fetch_flag(FetchInstr::indexed);
687    vtx.uncached = fetch_instr.has_fetch_flag(FetchInstr::uncached);
688    vtx.elem_size = fetch_instr.elm_size();
689    vtx.array_base = fetch_instr.array_base();
690    vtx.array_size = fetch_instr.array_size();
691    vtx.srf_mode_all = fetch_instr.has_fetch_flag(FetchInstr::srf_mode);
692 
693    if (fetch_instr.has_fetch_flag(FetchInstr::use_tc)) {
694       if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) {
695          R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
696          m_result = false;
697       }
698 
699    } else {
700       if ((r600_bytecode_add_vtx(m_bc, &vtx))) {
701          R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
702          m_result = false;
703       }
704    }
705 
706    m_bc->cf_last->vpm = (m_bc->type == PIPE_SHADER_FRAGMENT) &&
707          fetch_instr.has_fetch_flag(FetchInstr::vpm);
708    m_bc->cf_last->barrier = 1;
709 }
710 
visit(const WriteTFInstr & instr)711 void AssamblerVisitor::visit(const WriteTFInstr& instr)
712 {
713    struct r600_bytecode_gds gds;
714 
715    auto& value = instr.value();
716 
717    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
718    gds.src_gpr = value.sel();
719    gds.src_sel_x = value[0]->chan();
720    gds.src_sel_y = value[1]->chan();
721    gds.src_sel_z = 4;
722    gds.dst_sel_x = 7;
723    gds.dst_sel_y = 7;
724    gds.dst_sel_z = 7;
725    gds.dst_sel_w = 7;
726    gds.op = FETCH_OP_TF_WRITE;
727 
728    if (r600_bytecode_add_gds(m_bc, &gds) != 0) {
729       m_result = false;
730       return;
731    }
732 
733    if (value[2]->chan() != 7) {
734       memset(&gds, 0, sizeof(struct r600_bytecode_gds));
735       gds.src_gpr = value.sel();
736       gds.src_sel_x = value[2]->chan();
737       gds.src_sel_y = value[3]->chan();
738       gds.src_sel_z = 4;
739       gds.dst_sel_x = 7;
740       gds.dst_sel_y = 7;
741       gds.dst_sel_z = 7;
742       gds.dst_sel_w = 7;
743       gds.op = FETCH_OP_TF_WRITE;
744 
745       if (r600_bytecode_add_gds(m_bc, &gds)) {
746          m_result = false;
747          return;
748       }
749    }
750 }
751 
visit(const RatInstr & instr)752 void AssamblerVisitor::visit(const RatInstr& instr)
753 {
754    struct r600_bytecode_gds gds;
755 
756    /* The instruction writes to the retuen buffer loaction, and
757     * the value will actually be read bach, so make sure all previous writes
758     * have been finished */
759    if (m_ack_suggested /*&& instr.has_instr_flag(Instr::ack_rat_return_write)*/)
760       emit_wait_ack();
761 
762    int rat_idx = instr.rat_id();
763    EBufferIndexMode rat_index_mode = bim_none;
764    auto addr = instr.rat_id_offset();
765 
766    if (addr)
767       rat_index_mode = emit_index_reg(*addr, 1);
768 
769    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
770 
771    r600_bytecode_add_cfinst(m_bc, instr.cf_opcode());
772    auto cf = m_bc->cf_last;
773    cf->rat.id = rat_idx + m_shader->rat_base;
774    cf->rat.inst = instr.rat_op();
775    cf->rat.index_mode = rat_index_mode;
776    cf->output.type = instr.need_ack() ? 3 : 1;
777    cf->output.gpr = instr.data_gpr();
778    cf->output.index_gpr = instr.index_gpr();
779    cf->output.comp_mask = instr.comp_mask();
780    cf->output.burst_count = instr.burst_count();
781    assert(instr.data_swz(0) == PIPE_SWIZZLE_X);
782    if (cf->rat.inst != RatInstr::STORE_TYPED) {
783       assert(instr.data_swz(1) == PIPE_SWIZZLE_Y ||
784              instr.data_swz(1) == PIPE_SWIZZLE_MAX) ;
785       assert(instr.data_swz(2) == PIPE_SWIZZLE_Z ||
786              instr.data_swz(2) == PIPE_SWIZZLE_MAX) ;
787    }
788 
789    cf->vpm = m_bc->type == PIPE_SHADER_FRAGMENT;
790    cf->barrier = 1;
791    cf->mark = instr.need_ack();
792    cf->output.elem_size = instr.elm_size();
793 
794    m_ack_suggested |= instr.need_ack();
795 }
796 
797 
clear_states(const uint32_t & states)798 void AssamblerVisitor::clear_states(const uint32_t& states)
799 {
800    if (states & sf_vtx)
801       vtx_fetch_results.clear();
802 
803    if (states & sf_tex)
804       tex_fetch_results.clear();
805 
806    if (states & sf_alu) {
807       m_last_op_was_barrier = false;
808       m_last_addr = nullptr;
809    }
810 
811 }
812 
813 
visit(const Block & block)814 void AssamblerVisitor::visit(const Block& block)
815 {
816    if (block.empty())
817       return;
818 
819    m_bc->force_add_cf = block.has_instr_flag(Instr::force_cf);
820    sfn_log << SfnLog::assembly << "Translate block  size: " << block.size() << " new_cf:" << m_bc->force_add_cf << "\n";
821 
822    for (const auto& i : block) {
823       sfn_log << SfnLog::assembly << "Translate " << *i << " ";
824       i->accept(*this);
825       sfn_log << SfnLog::assembly << (m_result ? "good" : "fail") << "\n";
826 
827       if (!m_result)
828          break;
829    }
830 }
831 
visit(const IfInstr & instr)832 void AssamblerVisitor::visit(const IfInstr& instr)
833 {
834    int elems = m_callstack.push(FC_PUSH_VPM);
835    bool needs_workaround = false;
836 
837    if (m_bc->gfx_level == CAYMAN && m_bc->stack.loop > 1)
838       needs_workaround = true;
839 
840    if (m_bc->gfx_level == EVERGREEN &&
841        m_bc->family != CHIP_HEMLOCK &&
842        m_bc->family != CHIP_CYPRESS &&
843        m_bc->family != CHIP_JUNIPER) {
844       unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
845       unsigned dmod2 = (elems) % m_bc->stack.entry_size;
846 
847       if (elems && (!dmod1 || !dmod2))
848          needs_workaround = true;
849    }
850 
851    auto pred = instr.predicate();
852    auto [addr, dummy0, dummy1 ] = pred->indirect_addr(); {}
853    if (addr) {
854       if (!m_last_addr || !m_bc->ar_loaded ||
855           !m_last_addr->equal_to(*addr)) {
856          m_bc->ar_reg = addr->sel();
857             m_bc->ar_chan = addr->chan();
858             m_last_addr = addr;
859             m_bc->ar_loaded = 0;
860 
861             r600_load_ar(m_bc, true);
862       }
863    }
864 
865    if (needs_workaround) {
866       r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
867       m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
868       pred->set_cf_type(cf_alu);
869    }
870 
871    clear_states(sf_tex|sf_vtx);
872    pred->accept(*this);
873 
874    r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
875    clear_states(sf_all);
876 
877    m_jump_tracker.push(m_bc->cf_last, jt_if);
878 }
879 
visit(const ControlFlowInstr & instr)880 void AssamblerVisitor::visit(const ControlFlowInstr& instr)
881 {
882    clear_states(sf_all);
883    switch (instr.cf_type()) {
884    case ControlFlowInstr::cf_else:
885       emit_else();
886       break;
887    case ControlFlowInstr::cf_endif:
888       emit_endif();
889       break;
890    case ControlFlowInstr::cf_loop_begin:
891       emit_loop_begin(instr.has_instr_flag(Instr::vpm));
892       break;
893    case ControlFlowInstr::cf_loop_end:
894       emit_loop_end();
895       break;
896    case ControlFlowInstr::cf_loop_break:
897       emit_loop_break();
898       break;
899    case ControlFlowInstr::cf_loop_continue:
900       emit_loop_cont();
901       break;
902    case ControlFlowInstr::cf_wait_ack:
903    {
904       int r = r600_bytecode_add_cfinst(m_bc, CF_OP_WAIT_ACK);
905       if (!r) {
906          m_bc->cf_last->cf_addr = 0;
907          m_bc->cf_last->barrier = 1;
908          m_ack_suggested = false;
909       } else {
910          m_result = false;
911       }
912    }
913       break;
914    default:
915       unreachable("Unknown CF instruction type");
916    }
917 }
918 
visit(const GDSInstr & instr)919 void AssamblerVisitor::visit(const GDSInstr& instr)
920 {
921    struct r600_bytecode_gds gds;
922 
923    bool indirect = false;
924    auto addr = instr.uav_id();
925 
926    if (addr) {
927       indirect = true;
928       emit_index_reg(*addr, 1);
929    }
930 
931    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
932 
933    gds.op = ds_opcode_map.at(instr.opcode());
934    gds.dst_gpr = instr.dest()->sel();
935    gds.uav_id = instr.uav_base();
936    gds.uav_index_mode = indirect ? bim_one : bim_none;
937    gds.src_gpr = instr.src().sel();
938 
939    gds.src_sel_x = instr.src()[0]->chan() < 7 ? instr.src()[0]->chan() : 4;
940    gds.src_sel_y = instr.src()[1]->chan();
941    gds.src_sel_z = instr.src()[2]->chan() < 7 ? instr.src()[2]->chan() : 4;
942 
943    gds.dst_sel_x = 7;
944    gds.dst_sel_y = 7;
945    gds.dst_sel_z = 7;
946    gds.dst_sel_w = 7;
947 
948    switch (instr.dest()->chan()) {
949    case 0: gds.dst_sel_x = 0;break;
950    case 1: gds.dst_sel_y = 0;break;
951    case 2: gds.dst_sel_z = 0;break;
952    case 3: gds.dst_sel_w = 0;
953    }
954 
955    gds.src_gpr2 = 0;
956    gds.alloc_consume = m_bc->gfx_level < CAYMAN ? 1 : 0; // Not Cayman
957 
958    int r = r600_bytecode_add_gds(m_bc, &gds);
959    if (r) {
960       m_result = false;
961       return;
962    }
963    m_bc->cf_last->vpm = PIPE_SHADER_FRAGMENT == m_bc->type;
964    m_bc->cf_last->barrier = 1;
965 }
966 
visit(const LDSAtomicInstr & instr)967 void AssamblerVisitor::visit(const LDSAtomicInstr& instr)
968 {
969    (void)instr;
970    unreachable("LDSAtomicInstr must be lowered to ALUInstr");
971 }
972 
visit(const LDSReadInstr & instr)973 void AssamblerVisitor::visit(const LDSReadInstr& instr)
974 {
975    (void)instr;
976    unreachable("LDSReadInstr must be lowered to ALUInstr");
977 }
978 
979 EBufferIndexMode
emit_index_reg(const VirtualValue & addr,unsigned idx)980 AssamblerVisitor::emit_index_reg(const VirtualValue& addr, unsigned idx)
981 {
982    assert(idx < 2);
983 
984    if (!m_bc->index_loaded[idx] || m_loop_nesting ||
985        m_bc->index_reg[idx] != (unsigned)addr.sel()
986        ||  m_bc->index_reg_chan[idx] != (unsigned)addr.chan()) {
987       struct r600_bytecode_alu alu;
988 
989       // Make sure MOVA is not last instr in clause
990 
991       if (!m_bc->cf_last || (m_bc->cf_last->ndw>>1) >= 110)
992          m_bc->force_add_cf = 1;
993 
994       if (m_bc->gfx_level != CAYMAN) {
995 
996          EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0;
997 
998          memset(&alu, 0, sizeof(alu));
999          alu.op = opcode_map.at(op1_mova_int);
1000          alu.dst.chan = 0;
1001          alu.src[0].sel = addr.sel();
1002          alu.src[0].chan = addr.chan();
1003          alu.last = 1;
1004          sfn_log << SfnLog::assembly << "   mova_int, ";
1005          int r = r600_bytecode_add_alu(m_bc, &alu);
1006          if (r)
1007             return bim_invalid;
1008 
1009          alu.op = opcode_map.at(idxop);
1010          alu.dst.chan = 0;
1011          alu.src[0].sel = 0;
1012          alu.src[0].chan = 0;
1013          alu.last = 1;
1014          sfn_log << SfnLog::assembly << "op1_set_cf_idx" << idx;
1015          r = r600_bytecode_add_alu(m_bc, &alu);
1016          if (r)
1017             return bim_invalid;
1018       } else {
1019          memset(&alu, 0, sizeof(alu));
1020          alu.op = opcode_map.at(op1_mova_int);
1021          alu.dst.sel = idx == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
1022          alu.dst.chan = 0;
1023          alu.src[0].sel = addr.sel();
1024          alu.src[0].chan = addr.chan();
1025          alu.last = 1;
1026          sfn_log << SfnLog::assembly << "   mova_int, ";
1027          int r = r600_bytecode_add_alu(m_bc, &alu);
1028          if (r)
1029             return bim_invalid;
1030       }
1031 
1032       m_bc->ar_loaded = 0;
1033       m_bc->index_reg[idx] = addr.sel();
1034       m_bc->index_reg_chan[idx] = addr.chan();
1035       m_bc->index_loaded[idx] = true;
1036       m_bc->force_add_cf = 1;
1037       sfn_log << SfnLog::assembly << "\n";
1038    }
1039    return idx == 0 ? bim_zero : bim_one;
1040 }
1041 
emit_else()1042 void AssamblerVisitor::emit_else()
1043 {
1044    r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
1045    m_bc->cf_last->pop_count = 1;
1046    m_result &= m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
1047 }
1048 
emit_endif()1049 void AssamblerVisitor::emit_endif()
1050 {
1051    m_callstack.pop(FC_PUSH_VPM);
1052 
1053    unsigned force_pop = m_bc->force_add_cf;
1054    if (!force_pop) {
1055       int alu_pop = 3;
1056       if (m_bc->cf_last) {
1057          if (m_bc->cf_last->op == CF_OP_ALU)
1058             alu_pop = 0;
1059          else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
1060             alu_pop = 1;
1061       }
1062       alu_pop += 1;
1063       if (alu_pop == 1) {
1064          m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
1065          m_bc->force_add_cf = 1;
1066       } else {
1067          force_pop = 1;
1068       }
1069    }
1070 
1071    if (force_pop) {
1072       r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
1073       m_bc->cf_last->pop_count = 1;
1074       m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
1075    }
1076 
1077    m_result &= m_jump_tracker.pop(m_bc->cf_last, jt_if);
1078 }
1079 
emit_loop_begin(bool vpm)1080 void AssamblerVisitor::emit_loop_begin(bool vpm)
1081 {
1082    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
1083    m_bc->cf_last->vpm = vpm && m_bc->type == PIPE_SHADER_FRAGMENT;
1084    m_jump_tracker.push(m_bc->cf_last, jt_loop);
1085    m_callstack.push(FC_LOOP);
1086    ++m_loop_nesting;
1087 }
1088 
emit_loop_end()1089 void AssamblerVisitor::emit_loop_end()
1090 {
1091    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
1092    m_callstack.pop(FC_LOOP);
1093    assert(m_loop_nesting);
1094    --m_loop_nesting;
1095    m_result |= m_jump_tracker.pop(m_bc->cf_last, jt_loop);
1096 }
1097 
emit_loop_break()1098 void AssamblerVisitor::emit_loop_break()
1099 {
1100    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
1101    m_result |= m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
1102 }
1103 
emit_loop_cont()1104 void AssamblerVisitor::emit_loop_cont()
1105 {
1106    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
1107    m_result |= m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
1108 }
1109 
copy_dst(r600_bytecode_alu_dst & dst,const Register & d,bool write)1110 bool AssamblerVisitor::copy_dst(r600_bytecode_alu_dst& dst,
1111                                 const Register& d, bool write)
1112 {
1113    if (write && d.sel() > 124) {
1114       R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n",
1115                d.sel());
1116       m_result = false;
1117       return false;
1118    }
1119 
1120    dst.sel = d.sel();
1121    dst.chan = d.chan();
1122 
1123    if (m_bc->index_reg[1] == dst.sel &&
1124        m_bc->index_reg_chan[1] == dst.chan)
1125       m_bc->index_loaded[1] = false;
1126 
1127    if (m_bc->index_reg[0] == dst.sel &&
1128        m_bc->index_reg_chan[0] == dst.chan)
1129       m_bc->index_loaded[0] = false;
1130 
1131    return true;
1132 }
1133 
emit_wait_ack()1134 void AssamblerVisitor::emit_wait_ack()
1135 {
1136    int r = r600_bytecode_add_cfinst(m_bc, CF_OP_WAIT_ACK);
1137    if (!r) {
1138       m_bc->cf_last->cf_addr = 0;
1139       m_bc->cf_last->barrier = 1;
1140       m_ack_suggested = false;
1141    } else
1142       m_result = false;
1143 }
1144 
1145 class EncodeSourceVisitor : public ConstRegisterVisitor {
1146 public:
1147 
1148    EncodeSourceVisitor(r600_bytecode_alu_src& s, r600_bytecode *bc);
1149    void visit(const Register& value) override;
1150    void visit(const LocalArray& value) override;
1151    void visit(const LocalArrayValue& value) override;
1152    void visit(const UniformValue& value) override;
1153    void visit(const LiteralConstant& value) override;
1154    void visit(const InlineConstant& value) override;
1155 
1156    r600_bytecode_alu_src& src;
1157    r600_bytecode *m_bc;
1158    PVirtualValue m_buffer_offset{nullptr};
1159 };
1160 
copy_src(r600_bytecode_alu_src & src,const VirtualValue & s)1161 PVirtualValue AssamblerVisitor::copy_src(r600_bytecode_alu_src& src, const VirtualValue& s)
1162 {
1163 
1164    EncodeSourceVisitor visitor(src, m_bc);
1165    src.sel = s.sel();
1166    src.chan = s.chan();
1167 
1168    s.accept(visitor);
1169    return visitor.m_buffer_offset;
1170 }
1171 
EncodeSourceVisitor(r600_bytecode_alu_src & s,r600_bytecode * bc)1172 EncodeSourceVisitor::EncodeSourceVisitor(r600_bytecode_alu_src& s, r600_bytecode *bc):
1173    src(s), m_bc(bc)
1174 {
1175 }
1176 
visit(const Register & value)1177 void EncodeSourceVisitor::visit(const Register& value)
1178 {
1179    assert(value.sel() <= 124 && "Only have 124 registers");
1180 }
1181 
visit(const LocalArray & value)1182 void EncodeSourceVisitor::visit(const LocalArray& value)
1183 {
1184    (void)value;
1185    unreachable("An array can't be a source register");
1186 }
1187 
visit(const LocalArrayValue & value)1188 void EncodeSourceVisitor::visit(const LocalArrayValue& value)
1189 {
1190    src.rel = value.addr() ? 1 : 0;
1191 }
1192 
visit(const UniformValue & value)1193 void EncodeSourceVisitor::visit(const UniformValue& value)
1194 {
1195    assert(value.sel() >= 512 && "Uniform values must have a sel >= 512");
1196    m_buffer_offset = value.buf_addr();
1197    src.kc_bank = value.kcache_bank();
1198 }
1199 
visit(const LiteralConstant & value)1200 void EncodeSourceVisitor::visit(const LiteralConstant& value)
1201 {
1202    src.value = value.value();
1203 }
1204 
visit(const InlineConstant & value)1205 void EncodeSourceVisitor::visit(const InlineConstant& value)
1206 {
1207    (void)value;
1208 }
1209 
1210 
1211 
1212 const std::map<EAluOp, int> opcode_map = {
1213 
1214    {op2_add, ALU_OP2_ADD},
1215    {op2_mul, ALU_OP2_MUL},
1216    {op2_mul_ieee, ALU_OP2_MUL_IEEE},
1217    {op2_max, ALU_OP2_MAX},
1218    {op2_min, ALU_OP2_MIN},
1219    {op2_max_dx10, ALU_OP2_MAX_DX10},
1220    {op2_min_dx10, ALU_OP2_MIN_DX10},
1221    {op2_sete, ALU_OP2_SETE},
1222    {op2_setgt, ALU_OP2_SETGT},
1223    {op2_setge, ALU_OP2_SETGE},
1224    {op2_setne, ALU_OP2_SETNE},
1225    {op2_sete_dx10, ALU_OP2_SETE_DX10},
1226    {op2_setgt_dx10, ALU_OP2_SETGT_DX10},
1227    {op2_setge_dx10, ALU_OP2_SETGE_DX10},
1228    {op2_setne_dx10, ALU_OP2_SETNE_DX10},
1229    {op1_fract, ALU_OP1_FRACT},
1230    {op1_trunc, ALU_OP1_TRUNC},
1231    {op1_ceil, ALU_OP1_CEIL},
1232    {op1_rndne, ALU_OP1_RNDNE},
1233    {op1_floor, ALU_OP1_FLOOR},
1234    {op2_ashr_int, ALU_OP2_ASHR_INT},
1235    {op2_lshr_int, ALU_OP2_LSHR_INT},
1236    {op2_lshl_int, ALU_OP2_LSHL_INT},
1237    {op1_mov, ALU_OP1_MOV},
1238    {op0_nop, ALU_OP0_NOP},
1239    {op2_mul_64, ALU_OP2_MUL_64},
1240    {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1241    {op1v_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1242    {op2_prede_int, ALU_OP2_PRED_SETE_INT},
1243    {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
1244    {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
1245    {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
1246    {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT},
1247    {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT},
1248    {op2_pred_sete, ALU_OP2_PRED_SETE},
1249    {op2_pred_setgt, ALU_OP2_PRED_SETGT},
1250    {op2_pred_setge, ALU_OP2_PRED_SETGE},
1251    {op2_pred_setne, ALU_OP2_PRED_SETNE},
1252    {op0_pred_set_clr, ALU_OP0_PRED_SET_CLR},
1253    {op1_pred_set_restore, ALU_OP1_PRED_SET_RESTORE},
1254    {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH},
1255    {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH},
1256    {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH},
1257    {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH},
1258    {op2_kille, ALU_OP2_KILLE},
1259    {op2_killgt, ALU_OP2_KILLGT},
1260    {op2_killge, ALU_OP2_KILLGE},
1261    {op2_killne, ALU_OP2_KILLNE},
1262    {op2_and_int, ALU_OP2_AND_INT},
1263    {op2_or_int, ALU_OP2_OR_INT},
1264    {op2_xor_int, ALU_OP2_XOR_INT},
1265    {op1_not_int, ALU_OP1_NOT_INT},
1266    {op2_add_int, ALU_OP2_ADD_INT},
1267    {op2_sub_int, ALU_OP2_SUB_INT},
1268    {op2_max_int, ALU_OP2_MAX_INT},
1269    {op2_min_int, ALU_OP2_MIN_INT},
1270    {op2_max_uint, ALU_OP2_MAX_UINT},
1271    {op2_min_uint, ALU_OP2_MIN_UINT},
1272    {op2_sete_int, ALU_OP2_SETE_INT},
1273    {op2_setgt_int, ALU_OP2_SETGT_INT},
1274    {op2_setge_int, ALU_OP2_SETGE_INT},
1275    {op2_setne_int, ALU_OP2_SETNE_INT},
1276    {op2_setgt_uint, ALU_OP2_SETGT_UINT},
1277    {op2_setge_uint, ALU_OP2_SETGE_UINT},
1278    {op2_killgt_uint, ALU_OP2_KILLGT_UINT},
1279    {op2_killge_uint, ALU_OP2_KILLGE_UINT},
1280    {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
1281    {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
1282    {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
1283    {op2_kille_int, ALU_OP2_KILLE_INT},
1284    {op2_killgt_int, ALU_OP2_KILLGT_INT},
1285    {op2_killge_int, ALU_OP2_KILLGE_INT},
1286    {op2_killne_int, ALU_OP2_KILLNE_INT},
1287    {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT},
1288    {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT},
1289    {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT},
1290    {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT},
1291    {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT},
1292    {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT},
1293    {op1_flt_to_int, ALU_OP1_FLT_TO_INT},
1294    {op1_bfrev_int, ALU_OP1_BFREV_INT},
1295    {op2_addc_uint, ALU_OP2_ADDC_UINT},
1296    {op2_subb_uint, ALU_OP2_SUBB_UINT},
1297    {op0_group_barrier, ALU_OP0_GROUP_BARRIER},
1298    {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN},
1299    {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END},
1300    {op2_set_mode, ALU_OP2_SET_MODE},
1301    {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0},
1302    {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1},
1303    {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE},
1304    {op1_exp_ieee, ALU_OP1_EXP_IEEE},
1305    {op1_log_clamped, ALU_OP1_LOG_CLAMPED},
1306    {op1_log_ieee, ALU_OP1_LOG_IEEE},
1307    {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED},
1308    {op1_recip_ff, ALU_OP1_RECIP_FF},
1309    {op1_recip_ieee, ALU_OP1_RECIP_IEEE},
1310    {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED},
1311    {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF},
1312    {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE},
1313    {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE},
1314    {op1_sin, ALU_OP1_SIN},
1315    {op1_cos, ALU_OP1_COS},
1316    {op2_mullo_int, ALU_OP2_MULLO_INT},
1317    {op2_mulhi_int, ALU_OP2_MULHI_INT},
1318    {op2_mullo_uint, ALU_OP2_MULLO_UINT},
1319    {op2_mulhi_uint, ALU_OP2_MULHI_UINT},
1320    {op1_recip_int, ALU_OP1_RECIP_INT},
1321    {op1_recip_uint, ALU_OP1_RECIP_UINT},
1322    {op1_recip_64, ALU_OP2_RECIP_64},
1323    {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64},
1324    {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64},
1325    {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64},
1326    {op1_sqrt_64, ALU_OP2_SQRT_64},
1327    {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT},
1328    {op1_int_to_flt, ALU_OP1_INT_TO_FLT},
1329    {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT},
1330    {op2_bfm_int, ALU_OP2_BFM_INT},
1331    {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16},
1332    {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32},
1333    {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT},
1334    {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT},
1335    {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT},
1336    {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT},
1337    {op1_bcnt_int, ALU_OP1_BCNT_INT},
1338    {op1_ffbh_uint, ALU_OP1_FFBH_UINT},
1339    {op1_ffbl_int, ALU_OP1_FFBL_INT},
1340    {op1_ffbh_int, ALU_OP1_FFBH_INT},
1341    {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4},
1342    {op2_dot_ieee, ALU_OP2_DOT_IEEE},
1343    {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI},
1344    {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR},
1345    {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24},
1346    {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT},
1347    {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT},
1348    {op2_mul_uint24, ALU_OP2_MUL_UINT24},
1349    {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT},
1350    {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT},
1351    {op2_sete_64, ALU_OP2_SETE_64},
1352    {op2_setne_64, ALU_OP2_SETNE_64},
1353    {op2_setgt_64, ALU_OP2_SETGT_64},
1354    {op2_setge_64, ALU_OP2_SETGE_64},
1355    {op2_min_64, ALU_OP2_MIN_64},
1356    {op2_max_64, ALU_OP2_MAX_64},
1357    {op2_dot4, ALU_OP2_DOT4},
1358    {op2_dot4_ieee, ALU_OP2_DOT4_IEEE},
1359    {op2_cube, ALU_OP2_CUBE},
1360    {op1_max4, ALU_OP1_MAX4},
1361    {op1_frexp_64, ALU_OP1_FREXP_64},
1362    {op1_ldexp_64, ALU_OP2_LDEXP_64},
1363    {op1_fract_64, ALU_OP1_FRACT_64},
1364    {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64},
1365    {op2_pred_sete_64, ALU_OP2_PRED_SETE_64},
1366    {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64},
1367    {op2_add_64, ALU_OP2_ADD_64},
1368    {op1_mova_int, ALU_OP1_MOVA_INT},
1369    {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1370    {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1371    {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT},
1372    {op2_dot, ALU_OP2_DOT},
1373    {op1_mul_prev, ALU_OP1_MUL_PREV},
1374    {op1_mul_ieee_prev, ALU_OP1_MUL_IEEE_PREV},
1375    {op1_add_prev, ALU_OP1_ADD_PREV},
1376    {op2_muladd_prev, ALU_OP2_MULADD_PREV},
1377    {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV},
1378    {op2_interp_xy, ALU_OP2_INTERP_XY},
1379    {op2_interp_zw, ALU_OP2_INTERP_ZW},
1380    {op2_interp_x, ALU_OP2_INTERP_X},
1381    {op2_interp_z, ALU_OP2_INTERP_Z},
1382    {op0_store_flags, ALU_OP1_STORE_FLAGS},
1383    {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS},
1384    {op0_lds_1a, ALU_OP2_LDS_1A},
1385    {op0_lds_1a1d, ALU_OP2_LDS_1A1D},
1386    {op0_lds_2a, ALU_OP2_LDS_2A},
1387    {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0},
1388    {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10},
1389    {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20},
1390    {op3_bfe_uint, ALU_OP3_BFE_UINT},
1391    {op3_bfe_int, ALU_OP3_BFE_INT},
1392    {op3_bfi_int, ALU_OP3_BFI_INT},
1393    {op3_fma, ALU_OP3_FMA},
1394    {op3_cndne_64, ALU_OP3_CNDNE_64},
1395    {op3_fma_64, ALU_OP3_FMA_64},
1396    {op3_lerp_uint, ALU_OP3_LERP_UINT},
1397    {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT},
1398    {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT},
1399    {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT},
1400    {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT},
1401    {op3_muladd_uint24, ALU_OP3_MULADD_UINT24},
1402    {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP},
1403    {op3_muladd, ALU_OP3_MULADD},
1404    {op3_muladd_m2, ALU_OP3_MULADD_M2},
1405    {op3_muladd_m4, ALU_OP3_MULADD_M4},
1406    {op3_muladd_d2, ALU_OP3_MULADD_D2},
1407    {op3_muladd_ieee, ALU_OP3_MULADD_IEEE},
1408    {op3_cnde, ALU_OP3_CNDE},
1409    {op3_cndgt, ALU_OP3_CNDGT},
1410    {op3_cndge, ALU_OP3_CNDGE},
1411    {op3_cnde_int, ALU_OP3_CNDE_INT},
1412    {op3_cndgt_int, ALU_OP3_CNDGT_INT},
1413    {op3_cndge_int, ALU_OP3_CNDGE_INT},
1414    {op3_mul_lit, ALU_OP3_MUL_LIT},
1415 };
1416 
1417 const std::map<ESDOp, int> ds_opcode_map = {
1418    {DS_OP_ADD, FETCH_OP_GDS_ADD},
1419    {DS_OP_SUB, FETCH_OP_GDS_SUB},
1420    {DS_OP_RSUB, FETCH_OP_GDS_RSUB},
1421    {DS_OP_INC, FETCH_OP_GDS_INC},
1422    {DS_OP_DEC, FETCH_OP_GDS_DEC},
1423    {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT},
1424    {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT},
1425    {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT},
1426    {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT},
1427    {DS_OP_AND, FETCH_OP_GDS_AND},
1428    {DS_OP_OR, FETCH_OP_GDS_OR},
1429    {DS_OP_XOR, FETCH_OP_GDS_XOR},
1430    {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR},
1431    {DS_OP_WRITE, FETCH_OP_GDS_WRITE},
1432    {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL},
1433    {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2},
1434    {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE},
1435    {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF},
1436    {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE},
1437    {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE},
1438    {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET},
1439    {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET},
1440    {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET},
1441    {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET},
1442    {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET},
1443    {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET},
1444    {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET},
1445    {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET},
1446    {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET},
1447    {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET},
1448    {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET},
1449    {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET},
1450    {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET},
1451    {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET},
1452    {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET},
1453    {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET},
1454    {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET},
1455    {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET},
1456    {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET},
1457    {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET},
1458    {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET},
1459    {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET},
1460    {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET},
1461    {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET},
1462    {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET},
1463    {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET},
1464    {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC},
1465    {DS_OP_INVALID, 0},
1466 };
1467 
1468 }
1469