• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_debug.h"
28 #include "sfn_shader_vs.h"
29 
30 #include "sfn_instr_alugroup.h"
31 #include "sfn_instr_export.h"
32 
33 #include "tgsi/tgsi_from_mesa.h"
34 
35 
36 namespace r600 {
37 
enabled_stream_buffers_mask() const38 uint32_t VertexStageShader::enabled_stream_buffers_mask() const
39 {
40    return m_enabled_stream_buffers_mask;
41 }
42 
combine_enabled_stream_buffers_mask(uint32_t mask)43 void VertexStageShader::combine_enabled_stream_buffers_mask(uint32_t mask)
44 {
45    m_enabled_stream_buffers_mask = mask;
46 }
47 
store_output(nir_intrinsic_instr & intr)48 bool VertexExportStage::store_output(nir_intrinsic_instr& intr)
49 {
50    auto index = nir_src_as_const_value(intr.src[1]);
51    assert(index && "Indirect outputs not supported");
52 
53    const store_loc store_info  = {
54       nir_intrinsic_component(&intr),
55       nir_intrinsic_io_semantics(&intr).location,
56       (unsigned)nir_intrinsic_base(&intr) + index->u32,
57       0
58    };
59 
60    return do_store_output(store_info, intr);
61 }
62 
VertexExportStage(VertexStageShader * parent)63 VertexExportStage::VertexExportStage(VertexStageShader *parent):
64    m_parent(parent)
65 {
66 
67 }
68 
VertexExportForFs(VertexStageShader * parent,const pipe_stream_output_info * so_info,const r600_shader_key & key)69 VertexExportForFs::VertexExportForFs(VertexStageShader *parent,
70                                      const pipe_stream_output_info *so_info,
71                                      const r600_shader_key& key):
72    VertexExportStage(parent),
73    m_vs_as_gs_a(key.vs.as_gs_a),
74    m_vs_prim_id_out(key.vs.prim_id_out),
75    m_so_info(so_info)
76 {
77 }
78 
do_store_output(const store_loc & store_info,nir_intrinsic_instr & intr)79 bool VertexExportForFs::do_store_output(const store_loc &store_info, nir_intrinsic_instr& intr)
80 {
81    switch (store_info.location) {
82 
83    case VARYING_SLOT_PSIZ:
84       m_writes_point_size = true;
85       FALLTHROUGH;
86    case VARYING_SLOT_POS:
87       return emit_varying_pos(store_info, intr);
88    case VARYING_SLOT_EDGE: {
89       std::array<uint8_t, 4> swizzle_override = {7 ,0, 7, 7};
90       return emit_varying_pos(store_info, intr, &swizzle_override);
91    }
92    case VARYING_SLOT_VIEWPORT: {
93       std::array<uint8_t, 4> swizzle_override = {7, 7, 7, 0};
94       return emit_varying_pos(store_info, intr, &swizzle_override) &&
95             emit_varying_param(store_info, intr);
96    }
97    case VARYING_SLOT_CLIP_VERTEX:
98       return emit_clip_vertices(store_info, intr);
99    case VARYING_SLOT_CLIP_DIST0:
100    case VARYING_SLOT_CLIP_DIST1: {
101       bool success = emit_varying_pos(store_info, intr);
102       m_num_clip_dist += 4;
103       if (!nir_intrinsic_io_semantics(&intr).no_varying)
104          success &= emit_varying_param(store_info, intr);
105       return success;
106       }
107    case VARYING_SLOT_LAYER: {
108       m_out_misc_write = 1;
109       m_vs_out_layer = 1;
110       std::array<uint8_t, 4> swz = {7,7,0,7};
111       return emit_varying_pos(store_info, intr, &swz) &&
112             emit_varying_param(store_info, intr);
113    }
114    case VARYING_SLOT_VIEW_INDEX:
115    return emit_varying_pos(store_info, intr) &&
116          emit_varying_param(store_info, intr);
117 
118    default:
119       return emit_varying_param(store_info, intr);
120    return false;
121    }
122 }
123 
emit_clip_vertices(const store_loc & store_info,const nir_intrinsic_instr & instr)124 bool VertexExportForFs::emit_clip_vertices(const store_loc &store_info, const nir_intrinsic_instr &instr)
125 {
126    auto& vf = m_parent->value_factory();
127 
128    m_cc_dist_mask = 0xff;
129    m_clip_dist_write = 0xff;
130 
131    m_clip_vertex = vf.src_vec4(instr.src[store_info.data_loc], pin_group, {0,1,2,3});
132 
133    m_output_registers[nir_intrinsic_base(&instr)] = &m_clip_vertex;
134 
135    return true;
136 }
137 
get_shader_info(r600_shader * sh_info) const138 void VertexExportForFs::get_shader_info(r600_shader *sh_info) const
139 {
140    sh_info->cc_dist_mask = m_cc_dist_mask;
141    sh_info->clip_dist_write = m_clip_dist_write;
142    sh_info->vs_as_gs_a =  m_vs_as_gs_a;
143    sh_info->vs_out_edgeflag = m_out_edgeflag;
144    sh_info->vs_out_viewport = m_out_viewport;
145    sh_info->vs_out_misc_write = m_out_misc_write;
146    sh_info->vs_out_point_size = m_out_point_size;
147    sh_info->vs_out_layer = m_vs_out_layer;
148 }
149 
finalize()150 void VertexExportForFs::finalize()
151 {
152    if (m_vs_as_gs_a) {
153       auto primid = m_parent->value_factory().temp_vec4(pin_group, {2, 7, 7, 7});
154       m_parent->emit_instruction(new AluInstr(op1_mov, primid[0], m_parent->primitive_id(), AluInstr::last_write));
155       int param = m_last_param_export ? m_last_param_export->location() + 1 : 0;
156 
157       m_last_param_export = new ExportInstr(ExportInstr::param, param, primid);
158       m_parent->emit_instruction(m_last_param_export);
159 
160       ShaderOutput output(m_parent->noutputs(), TGSI_SEMANTIC_PRIMID, 1);
161       output.set_sid(0);
162       output.override_spi_sid(m_vs_prim_id_out);
163       m_parent->add_output(output);
164    }
165 
166    if (!m_last_pos_export) {
167       RegisterVec4 value(0, false, {7,7,7,7});
168       m_last_pos_export = new ExportInstr(ExportInstr::pos, 0, value);
169       m_parent->emit_instruction(m_last_pos_export);
170    }
171 
172    if (!m_last_param_export) {
173       RegisterVec4 value(0,false, {7,7,7,7});
174       m_last_param_export = new ExportInstr(ExportInstr::param, 0, value);
175       m_parent->emit_instruction(m_last_param_export);
176    }
177 
178    m_last_pos_export->set_is_last_export(true);
179    m_last_param_export->set_is_last_export(true);
180 
181    if (m_so_info && m_so_info->num_outputs)
182       emit_stream(-1);
183 }
184 
do_get_shader_info(r600_shader * sh_info)185 void VertexShader::do_get_shader_info(r600_shader *sh_info)
186 {
187    sh_info->processor_type = PIPE_SHADER_VERTEX;
188    m_export_stage->get_shader_info(sh_info);
189 }
190 
emit_varying_pos(const store_loc & store_info,nir_intrinsic_instr & intr,std::array<uint8_t,4> * swizzle_override)191 bool VertexExportForFs::emit_varying_pos(const store_loc &store_info, nir_intrinsic_instr& intr,
192                                          std::array<uint8_t, 4> *swizzle_override)
193 {
194    RegisterVec4::Swizzle swizzle;
195    uint32_t write_mask = 0;
196 
197    write_mask = nir_intrinsic_write_mask(&intr) << store_info.frac;
198 
199    if (!swizzle_override) {
200       for (int i = 0; i < 4; ++i)
201          swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7;
202    } else
203       std::copy(swizzle_override->begin(), swizzle_override->end(), swizzle.begin());
204 
205    int export_slot = 0;
206 
207    auto in_value = m_parent->value_factory().src_vec4(intr.src[0], pin_group, swizzle);
208    auto& value = in_value;
209    RegisterVec4 out_value = m_parent->value_factory().temp_vec4(pin_group, swizzle);
210 
211    switch (store_info.location) {
212    case VARYING_SLOT_EDGE: {
213       m_out_misc_write = true;
214       m_out_edgeflag = true;
215       auto src = m_parent->value_factory().src(intr.src[0], 0);
216       auto clamped = m_parent->value_factory().temp_register();
217       m_parent->emit_instruction(new AluInstr(op1_mov, clamped, src, {alu_write, alu_dst_clamp, alu_last_instr}));
218       auto alu = new AluInstr(op1_flt_to_int, out_value[1], clamped, AluInstr::last_write);
219       if (m_parent->chip_class() < ISA_CC_EVERGREEN)
220          alu->set_alu_flag(alu_is_trans);
221       m_parent->emit_instruction(alu);
222 
223       value = out_value;
224    }
225       FALLTHROUGH;
226    case VARYING_SLOT_PSIZ:
227       m_out_misc_write = true;
228       m_out_point_size = true;
229       FALLTHROUGH;
230    case VARYING_SLOT_LAYER:
231       export_slot = 1;
232       break;
233    case VARYING_SLOT_VIEWPORT:
234       m_out_misc_write = true;
235       m_out_viewport = true;
236       export_slot = 1;
237       break;
238    case VARYING_SLOT_POS:
239       break;
240    case VARYING_SLOT_CLIP_DIST0:
241    case VARYING_SLOT_CLIP_DIST1:
242       m_cc_dist_mask |= write_mask << (4 * (store_info.location - VARYING_SLOT_CLIP_DIST0));
243       m_clip_dist_write |= write_mask <<  (4 * (store_info.location - VARYING_SLOT_CLIP_DIST0));
244       export_slot = m_cur_clip_pos++;
245       break;
246    default:
247       sfn_log << SfnLog::err << __func__ << "Unsupported location "
248               << store_info.location << "\n";
249       return false;
250    }
251 
252 
253    m_last_pos_export = new ExportInstr(ExportInstr::pos,  export_slot, value);
254 
255    m_output_registers[nir_intrinsic_base(&intr)] = &m_last_pos_export->value();
256 
257    m_parent->emit_instruction(m_last_pos_export);
258 
259    return true;
260 }
261 
262 
emit_varying_param(const store_loc & store_info,nir_intrinsic_instr & intr)263 bool VertexExportForFs::emit_varying_param(const store_loc &store_info, nir_intrinsic_instr& intr)
264 {
265    sfn_log << SfnLog::io << __func__ << ": emit DDL: " << store_info.driver_location << "\n";
266 
267    int write_mask = nir_intrinsic_write_mask(&intr) << store_info.frac;
268    RegisterVec4::Swizzle swizzle;
269    for (int i = 0; i < 4; ++i)
270       swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7;
271 
272    Pin pin = util_bitcount(write_mask) > 1 ? pin_group: pin_free;
273 
274    int export_slot = m_parent->output(nir_intrinsic_base(&intr)).pos();
275    auto value = m_parent->value_factory().temp_vec4(pin, swizzle);
276 
277    AluInstr *alu = nullptr;
278    for (int i = 0; i < 4; ++i) {
279       if (swizzle[i] < 4) {
280          alu = new AluInstr(op1_mov, value[i], m_parent->value_factory().src(intr.src[0], swizzle[i]),
281                AluInstr::write);
282          m_parent->emit_instruction(alu);
283       }
284    }
285    if (alu)
286       alu->set_alu_flag(alu_last_instr);
287 
288    m_last_param_export = new ExportInstr(ExportInstr::param,  export_slot, value);
289    m_output_registers[nir_intrinsic_base(&intr)] = &m_last_param_export->value();
290 
291    m_parent->emit_instruction(m_last_param_export);
292 
293    return true;
294 }
295 
emit_stream(int stream)296 bool VertexExportForFs::emit_stream(int stream)
297 {
298    assert(m_so_info);
299    if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
300       R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
301       return false;
302    }
303    for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
304       if (m_so_info->output[i].output_buffer >= 4) {
305          R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
306                   m_so_info->output[i].output_buffer);
307          return false;
308       }
309    }
310    const RegisterVec4 *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
311    unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
312    std::vector<RegisterVec4> tmp(m_so_info->num_outputs);
313 
314    /* Initialize locations where the outputs are stored. */
315    for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
316       if (stream != -1 && stream != m_so_info->output[i].stream)
317          continue;
318 
319       sfn_log << SfnLog::instr << "Emit stream " << i
320               << " with register index " << m_so_info->output[i].register_index << "  so_gpr:";
321 
322 
323       so_gpr[i] = output_register(m_so_info->output[i].register_index);
324 
325       if (!so_gpr[i]) {
326          sfn_log << SfnLog::err << "\nERR: register index "
327               << m_so_info->output[i].register_index
328               << " doesn't correspond to an output register\n";
329          return false;
330       }
331       start_comp[i] = m_so_info->output[i].start_component;
332       /* Lower outputs with dst_offset < start_component.
333        *
334        * We can only output 4D vectors with a write mask, e.g. we can
335        * only output the W component at offset 3, etc. If we want
336        * to store Y, Z, or W at buffer offset 0, we need to use MOV
337        * to move it to X and output X. */
338 
339       bool need_copy = m_so_info->output[i].dst_offset < m_so_info->output[i].start_component;
340 
341       int sc = m_so_info->output[i].start_component;
342       for (int j = 0; j < m_so_info->output[i].num_components; j++) {
343          if ((*so_gpr[i])[j + sc]->chan() != j + sc) {
344             need_copy = true;
345             break;
346          }
347       }
348       if (need_copy) {
349          RegisterVec4::Swizzle swizzle =  {0,1,2,3};
350          for (auto j = m_so_info->output[i].num_components; j < 4; ++j)
351             swizzle[j] = 7;
352          tmp[i] = m_parent->value_factory().temp_vec4(pin_group, swizzle);
353 
354          AluInstr *alu = nullptr;
355          for (int j = 0; j < m_so_info->output[i].num_components; j++) {
356             alu = new AluInstr(op1_mov, tmp[i][j], (*so_gpr[i])[j + sc], {alu_write});
357             m_parent->emit_instruction(alu);
358          }
359          if (alu)
360             alu->set_alu_flag(alu_last_instr);
361 
362          start_comp[i] = 0;
363          so_gpr[i] = &tmp[i];
364       }
365       sfn_log << SfnLog::instr <<  *so_gpr[i] << "\n";
366    }
367 
368    uint32_t enabled_stream_buffers_mask = 0;
369    /* Write outputs to buffers. */
370    for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
371       sfn_log << SfnLog::instr << "Write output buffer " << i
372               << " with register index " << m_so_info->output[i].register_index << "\n";
373 
374       auto out_stream =
375             new StreamOutInstr(*so_gpr[i],
376                                m_so_info->output[i].num_components,
377                                     m_so_info->output[i].dst_offset - start_comp[i],
378                                ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i],
379                                m_so_info->output[i].output_buffer,
380                                m_so_info->output[i].stream);
381       m_parent->emit_instruction(out_stream);
382       enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4;
383    }
384    m_parent->combine_enabled_stream_buffers_mask(enabled_stream_buffers_mask);
385    return true;
386 }
387 
output_register(int loc) const388 const RegisterVec4 *VertexExportForFs::output_register(int loc) const
389 {
390    const RegisterVec4 *retval = nullptr;
391    auto val = m_output_registers.find(loc);
392    if (val != m_output_registers.end())
393       retval =  val->second;
394    return retval;
395 }
396 
VertexShader(const pipe_stream_output_info * so_info,r600_shader * gs_shader,r600_shader_key & key)397 VertexShader::VertexShader(const pipe_stream_output_info *so_info, r600_shader *gs_shader, r600_shader_key& key):
398    VertexStageShader("VS"),
399    m_vs_as_gs_a(key.vs.as_gs_a)
400 {
401    if (key.vs.as_es)
402       m_export_stage = new VertexExportForGS(this, gs_shader);
403    else if (key.vs.as_ls)
404       m_export_stage = new VertexExportForTCS(this);
405    else
406       m_export_stage = new VertexExportForFs(this, so_info, key);
407 }
408 
do_scan_instruction(nir_instr * instr)409 bool VertexShader::do_scan_instruction(nir_instr *instr)
410 {
411    if (instr->type != nir_instr_type_intrinsic)
412       return false;
413 
414    auto intr = nir_instr_as_intrinsic(instr);
415 
416    switch (intr->intrinsic) {
417    case nir_intrinsic_load_input: {
418       int vtx_register = nir_intrinsic_base(intr) + 1;
419       if (m_last_vertex_atribute_register < vtx_register)
420          m_last_vertex_atribute_register = vtx_register;
421       return true;
422    }
423    case nir_intrinsic_store_output: {
424       int driver_location = nir_intrinsic_base(intr);
425       int location = nir_intrinsic_io_semantics(intr).location;
426       auto semantic = r600_get_varying_semantic(location);
427       tgsi_semantic name = (tgsi_semantic)semantic.first;
428       unsigned sid = semantic.second;
429       auto write_mask = nir_intrinsic_write_mask(intr);
430 
431       if (location == VARYING_SLOT_LAYER)
432          write_mask = 4;
433 
434       ShaderOutput output(driver_location, name, write_mask);
435       output.set_sid(sid);
436 
437       switch (location) {
438       case VARYING_SLOT_CLIP_DIST0:
439       case VARYING_SLOT_CLIP_DIST1:
440          if (nir_intrinsic_io_semantics(intr).no_varying)
441             break;
442          FALLTHROUGH;
443       case VARYING_SLOT_VIEWPORT:
444       case VARYING_SLOT_LAYER:
445       case VARYING_SLOT_VIEW_INDEX:
446       default:
447          output.set_is_param(true);
448          FALLTHROUGH;
449       case VARYING_SLOT_PSIZ:
450       case VARYING_SLOT_POS:
451       case VARYING_SLOT_CLIP_VERTEX:
452       case VARYING_SLOT_EDGE:
453          add_output(output);
454          break;
455       }
456       break;
457    }
458    case nir_intrinsic_load_vertex_id:
459       m_sv_values.set(es_vertexid);
460       break;
461    case nir_intrinsic_load_instance_id:
462       m_sv_values.set(es_instanceid);
463       break;
464    case nir_intrinsic_load_primitive_id:
465       m_sv_values.set(es_primitive_id);
466       break;
467    case nir_intrinsic_load_tcs_rel_patch_id_r600:
468       m_sv_values.set(es_rel_patch_id);
469       break;
470    default:
471       return false;
472    }
473 
474    return true;
475 }
476 
load_input(nir_intrinsic_instr * intr)477 bool VertexShader::load_input(nir_intrinsic_instr *intr)
478 {
479    unsigned driver_location = nir_intrinsic_base(intr);
480    unsigned location = nir_intrinsic_io_semantics(intr).location;
481    auto& vf = value_factory();
482 
483    AluInstr *ir = nullptr;
484    if (location < VERT_ATTRIB_MAX) {
485       for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
486          auto src = vf.allocate_pinned_register(driver_location + 1, i);
487          src->pin_live_range(true);
488          if (intr->dest.is_ssa)
489             vf.inject_value(intr->dest, i, src);
490          else {
491             ir = new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_none), src, {alu_write});
492             emit_instruction(ir);
493          }
494       }
495       if (ir)
496          ir->set_alu_flag(alu_last_instr);
497 
498       ShaderInput input(driver_location, location);
499       input.set_gpr(driver_location + 1);
500       add_input(input);
501       return true;
502    }
503    fprintf(stderr, "r600-NIR: Unimplemented load_deref for %d\n", location);
504    return false;
505 }
506 
507 
do_allocate_reserved_registers()508 int VertexShader::do_allocate_reserved_registers()
509 {
510    if (m_sv_values.test(es_vertexid)) {
511       m_vertex_id = value_factory().allocate_pinned_register(0, 0);
512       m_vertex_id->pin_live_range(true);
513    }
514 
515    if (m_sv_values.test(es_instanceid)) {
516       m_instance_id = value_factory().allocate_pinned_register(0, 3);
517       m_instance_id->pin_live_range(true);
518    }
519 
520    if (m_sv_values.test(es_primitive_id) || m_vs_as_gs_a) {
521       auto primitive_id = value_factory().allocate_pinned_register(0, 2);
522       primitive_id->pin_live_range(true);
523       set_primitive_id(primitive_id);
524    }
525 
526    if (m_sv_values.test(es_rel_patch_id)) {
527       m_rel_vertex_id = value_factory().allocate_pinned_register(0, 1);
528       m_rel_vertex_id->pin_live_range(true);
529    }
530 
531    return m_last_vertex_atribute_register + 1;
532 }
533 
store_output(nir_intrinsic_instr * intr)534 bool VertexShader::store_output(nir_intrinsic_instr *intr)
535 {
536    return m_export_stage->store_output(*intr);
537 }
538 
process_stage_intrinsic(nir_intrinsic_instr * intr)539 bool VertexShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
540 {
541    switch (intr->intrinsic) {
542    case nir_intrinsic_load_vertex_id:
543       return emit_simple_mov(intr->dest, 0, m_vertex_id);
544    case nir_intrinsic_load_instance_id:
545       return emit_simple_mov(intr->dest, 0, m_instance_id);
546    case nir_intrinsic_load_primitive_id:
547       return emit_simple_mov(intr->dest, 0, primitive_id());
548    case nir_intrinsic_load_tcs_rel_patch_id_r600:
549       return emit_simple_mov(intr->dest, 0, m_rel_vertex_id);
550    default:
551       return false;
552    }
553 }
554 
do_finalize()555 void VertexShader::do_finalize()
556 {
557    m_export_stage->finalize();
558 }
559 
read_prop(std::istream & is)560 bool VertexShader::read_prop(std::istream& is)
561 {
562    (void)is;
563    return false;
564 }
565 
do_print_properties(std::ostream & os) const566 void VertexShader::do_print_properties(std::ostream& os) const
567 {
568    (void)os;
569 }
570 
VertexExportForGS(VertexStageShader * parent,const r600_shader * gs_shader)571 VertexExportForGS::VertexExportForGS(VertexStageShader *parent,
572                                                const r600_shader *gs_shader):
573    VertexExportStage(parent),
574    m_gs_shader(gs_shader)
575 {
576 
577 }
578 
do_store_output(const store_loc & store_info,nir_intrinsic_instr & instr)579 bool VertexExportForGS::do_store_output(const store_loc &store_info, nir_intrinsic_instr& instr)
580 {
581    int ring_offset = -1;
582    auto out_io = m_parent->output(store_info.driver_location);
583 
584    sfn_log << SfnLog::io << "check output " << store_info.driver_location
585            << " name=" << out_io.name()<< " sid=" << out_io.sid() << "\n";
586 
587    for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
588       auto& in_io = m_gs_shader->input[k];
589       sfn_log << SfnLog::io << "  against  " <<  k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n";
590 
591       if (in_io.name == out_io.name() &&
592           in_io.sid == out_io.sid()) {
593          ring_offset = in_io.ring_offset;
594          break;
595       }
596    }
597 
598    if (store_info.location == VARYING_SLOT_VIEWPORT) {
599       m_vs_out_viewport = 1;
600       m_vs_out_misc_write = 1;
601       return true;
602    }
603 
604    if (ring_offset == -1) {
605       sfn_log << SfnLog::err << "VS defines output at "
606               << store_info.driver_location << "name=" << out_io.name()
607               << " sid=" << out_io.sid() << " that is not consumed as GS input\n";
608       return true;
609    }
610 
611    RegisterVec4::Swizzle src_swz = {7,7,7,7};
612    for (int i = 0; i < 4; ++i)
613       src_swz[i] = i < instr.num_components ? i : 7;
614 
615    auto value = m_parent->value_factory().temp_vec4(pin_group, src_swz);
616 
617    AluInstr *ir = nullptr;
618    for (unsigned int i = 0; i < instr.num_components ; ++i) {
619       ir = new AluInstr(op1_mov, value[i],
620                         m_parent->value_factory().src(instr.src[store_info.data_loc], i),
621                         AluInstr::write);
622       m_parent->emit_instruction(ir);
623    }
624    if (ir)
625       ir->set_alu_flag(alu_last_instr);
626 
627    m_parent->emit_instruction(new MemRingOutInstr(cf_mem_ring, MemRingOutInstr::mem_write, value,
628                                                   ring_offset >> 2, 4, nullptr));
629 
630    if (store_info.location == VARYING_SLOT_CLIP_DIST0 ||
631        store_info.location == VARYING_SLOT_CLIP_DIST1)
632       m_num_clip_dist += 4;
633 
634    return true;
635 }
636 
finalize()637 void VertexExportForGS::finalize()
638 {
639 
640 }
641 
get_shader_info(r600_shader * sh_info) const642 void VertexExportForGS::get_shader_info(r600_shader *sh_info) const
643 {
644    sh_info->vs_out_viewport = m_vs_out_viewport;
645    sh_info->vs_out_misc_write = m_vs_out_misc_write;
646    sh_info->vs_as_es = true;
647 }
648 
VertexExportForTCS(VertexStageShader * parent)649 VertexExportForTCS::VertexExportForTCS(VertexStageShader *parent):
650    VertexExportStage(parent)
651 {
652 
653 }
654 
finalize()655 void VertexExportForTCS::finalize()
656 {
657 
658 }
659 
get_shader_info(r600_shader * sh_info) const660 void VertexExportForTCS::get_shader_info(r600_shader *sh_info) const
661 {
662    sh_info->vs_as_ls = 1;
663 }
664 
665 
do_store_output(const store_loc & store_info,nir_intrinsic_instr & intr)666 bool VertexExportForTCS::do_store_output(const store_loc &store_info, nir_intrinsic_instr& intr)
667 {
668    (void)store_info;
669    (void)intr;
670    return true;
671 }
672 
673 
674 
675 }
676 
677