• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_shader_vs.h"
28 
29 #include "../r600_asm.h"
30 
31 #include "sfn_debug.h"
32 #include "sfn_instr_alugroup.h"
33 #include "sfn_instr_export.h"
34 
35 namespace r600 {
36 
37 uint32_t
enabled_stream_buffers_mask() const38 VertexStageShader::enabled_stream_buffers_mask() const
39 {
40    return m_enabled_stream_buffers_mask;
41 }
42 
43 void
combine_enabled_stream_buffers_mask(uint32_t mask)44 VertexStageShader::combine_enabled_stream_buffers_mask(uint32_t mask)
45 {
46    m_enabled_stream_buffers_mask = mask;
47 }
48 
49 bool
store_output(nir_intrinsic_instr & intr)50 VertexExportStage::store_output(nir_intrinsic_instr& intr)
51 {
52    auto index = nir_src_as_const_value(intr.src[1]);
53    assert(index && "Indirect outputs not supported");
54 
55    const store_loc store_info = {nir_intrinsic_component(&intr),
56                                  nir_intrinsic_io_semantics(&intr).location,
57                                  (unsigned)nir_intrinsic_base(&intr) + index->u32,
58                                  0};
59 
60    return do_store_output(store_info, intr);
61 }
62 
VertexExportStage(VertexStageShader * parent)63 VertexExportStage::VertexExportStage(VertexStageShader *parent):
64     m_parent(parent)
65 {
66 }
67 
VertexExportForFs(VertexStageShader * parent,const pipe_stream_output_info * so_info,const r600_shader_key & key)68 VertexExportForFs::VertexExportForFs(VertexStageShader *parent,
69                                      const pipe_stream_output_info *so_info,
70                                      const r600_shader_key& key):
71     VertexExportStage(parent),
72     m_vs_as_gs_a(key.vs.as_gs_a),
73     m_so_info(so_info)
74 {
75 }
76 
77 bool
do_store_output(const store_loc & store_info,nir_intrinsic_instr & intr)78 VertexExportForFs::do_store_output(const store_loc& store_info, nir_intrinsic_instr& intr)
79 {
80    switch (store_info.location) {
81 
82    case VARYING_SLOT_PSIZ:
83       m_writes_point_size = true;
84       FALLTHROUGH;
85    case VARYING_SLOT_POS:
86       return emit_varying_pos(store_info, intr);
87    case VARYING_SLOT_EDGE: {
88       std::array<uint8_t, 4> swizzle_override = {7, 0, 7, 7};
89       return emit_varying_pos(store_info, intr, &swizzle_override);
90    }
91    case VARYING_SLOT_VIEWPORT: {
92       std::array<uint8_t, 4> swizzle_override = {7, 7, 7, 0};
93       return emit_varying_pos(store_info, intr, &swizzle_override) &&
94              emit_varying_param(store_info, intr);
95    }
96    case VARYING_SLOT_CLIP_VERTEX:
97       return emit_clip_vertices(store_info, intr);
98    case VARYING_SLOT_CLIP_DIST0:
99    case VARYING_SLOT_CLIP_DIST1: {
100       bool success = emit_varying_pos(store_info, intr);
101       m_num_clip_dist += 4;
102       if (!nir_intrinsic_io_semantics(&intr).no_varying)
103          success &= emit_varying_param(store_info, intr);
104       return success;
105    }
106    case VARYING_SLOT_LAYER: {
107       m_out_misc_write = 1;
108       m_vs_out_layer = 1;
109       std::array<uint8_t, 4> swz = {7, 7, 0, 7};
110       return emit_varying_pos(store_info, intr, &swz) &&
111              emit_varying_param(store_info, intr);
112    }
113    case VARYING_SLOT_VIEW_INDEX:
114       return emit_varying_pos(store_info, intr) && emit_varying_param(store_info, intr);
115 
116    default:
117       return emit_varying_param(store_info, intr);
118       return false;
119    }
120 }
121 
122 bool
emit_clip_vertices(const store_loc & store_info,const nir_intrinsic_instr & instr)123 VertexExportForFs::emit_clip_vertices(const store_loc& store_info,
124                                       const nir_intrinsic_instr& instr)
125 {
126    auto& vf = m_parent->value_factory();
127 
128    m_cc_dist_mask = 0xff;
129    m_clip_dist_write = 0xff;
130 
131    m_clip_vertex = vf.src_vec4(instr.src[store_info.data_loc], pin_group, {0, 1, 2, 3});
132 
133    m_output_registers[nir_intrinsic_base(&instr)] = &m_clip_vertex;
134 
135    return true;
136 }
137 
138 void
get_shader_info(r600_shader * sh_info) const139 VertexExportForFs::get_shader_info(r600_shader *sh_info) const
140 {
141    sh_info->cc_dist_mask = m_cc_dist_mask;
142    sh_info->clip_dist_write = m_clip_dist_write;
143    sh_info->vs_as_gs_a = m_vs_as_gs_a;
144    sh_info->vs_out_edgeflag = m_out_edgeflag;
145    sh_info->vs_out_viewport = m_out_viewport;
146    sh_info->vs_out_misc_write = m_out_misc_write;
147    sh_info->vs_out_point_size = m_out_point_size;
148    sh_info->vs_out_layer = m_vs_out_layer;
149 }
150 
151 void
finalize()152 VertexExportForFs::finalize()
153 {
154    if (m_vs_as_gs_a) {
155       auto primid = m_parent->value_factory().temp_vec4(pin_group, {2, 7, 7, 7});
156       m_parent->emit_instruction(new AluInstr(
157          op1_mov, primid[0], m_parent->primitive_id(), AluInstr::last_write));
158       int param = m_last_param_export ? m_last_param_export->location() + 1 : 0;
159 
160       m_last_param_export = new ExportInstr(ExportInstr::param, param, primid);
161       m_parent->emit_instruction(m_last_param_export);
162 
163       ShaderOutput output(m_parent->noutputs(), 1, VARYING_SLOT_PRIMITIVE_ID);
164       output.set_export_param(param);
165       m_parent->add_output(output);
166    }
167 
168    if (!m_last_pos_export) {
169       RegisterVec4 value(0, false, {7, 7, 7, 7});
170       m_last_pos_export = new ExportInstr(ExportInstr::pos, 0, value);
171       m_parent->emit_instruction(m_last_pos_export);
172    }
173 
174    if (!m_last_param_export) {
175       RegisterVec4 value(0, false, {7, 7, 7, 7});
176       m_last_param_export = new ExportInstr(ExportInstr::param, 0, value);
177       m_parent->emit_instruction(m_last_param_export);
178    }
179 
180    m_last_pos_export->set_is_last_export(true);
181    m_last_param_export->set_is_last_export(true);
182 
183    if (m_so_info && m_so_info->num_outputs)
184       emit_stream(-1);
185 }
186 
187 void
do_get_shader_info(r600_shader * sh_info)188 VertexShader::do_get_shader_info(r600_shader *sh_info)
189 {
190    sh_info->processor_type = PIPE_SHADER_VERTEX;
191    m_export_stage->get_shader_info(sh_info);
192 }
193 
194 bool
emit_varying_pos(const store_loc & store_info,nir_intrinsic_instr & intr,std::array<uint8_t,4> * swizzle_override)195 VertexExportForFs::emit_varying_pos(const store_loc& store_info,
196                                     nir_intrinsic_instr& intr,
197                                     std::array<uint8_t, 4> *swizzle_override)
198 {
199    RegisterVec4::Swizzle swizzle;
200    uint32_t write_mask = 0;
201 
202    write_mask = nir_intrinsic_write_mask(&intr) << store_info.frac;
203 
204    if (!swizzle_override) {
205       for (int i = 0; i < 4; ++i)
206          swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7;
207    } else
208       std::copy(swizzle_override->begin(), swizzle_override->end(), swizzle.begin());
209 
210    int export_slot = 0;
211 
212    auto in_value = m_parent->value_factory().src_vec4(intr.src[0], pin_group, swizzle);
213    auto& value = in_value;
214    RegisterVec4 out_value = m_parent->value_factory().temp_vec4(pin_group, swizzle);
215 
216    switch (store_info.location) {
217    case VARYING_SLOT_EDGE: {
218       m_out_misc_write = true;
219       m_out_edgeflag = true;
220       auto src = m_parent->value_factory().src(intr.src[0], 0);
221       auto clamped = m_parent->value_factory().temp_register();
222       m_parent->emit_instruction(
223          new AluInstr(op1_mov, clamped, src, {alu_write, alu_dst_clamp, alu_last_instr}));
224       auto alu =
225          new AluInstr(op1_flt_to_int, out_value[1], clamped, AluInstr::last_write);
226       if (m_parent->chip_class() < ISA_CC_EVERGREEN)
227          alu->set_alu_flag(alu_is_trans);
228       m_parent->emit_instruction(alu);
229 
230       value = out_value;
231    }
232       FALLTHROUGH;
233    case VARYING_SLOT_PSIZ:
234       m_out_misc_write = true;
235       m_out_point_size = true;
236       FALLTHROUGH;
237    case VARYING_SLOT_LAYER:
238       export_slot = 1;
239       break;
240    case VARYING_SLOT_VIEWPORT:
241       m_out_misc_write = true;
242       m_out_viewport = true;
243       export_slot = 1;
244       break;
245    case VARYING_SLOT_POS:
246       break;
247    case VARYING_SLOT_CLIP_DIST0:
248    case VARYING_SLOT_CLIP_DIST1:
249       m_cc_dist_mask |= write_mask
250                         << (4 * (store_info.location - VARYING_SLOT_CLIP_DIST0));
251       m_clip_dist_write |= write_mask
252                            << (4 * (store_info.location - VARYING_SLOT_CLIP_DIST0));
253       export_slot = m_cur_clip_pos++;
254       break;
255    default:
256       sfn_log << SfnLog::err << __func__ << "Unsupported location " << store_info.location
257               << "\n";
258       return false;
259    }
260 
261    m_last_pos_export = new ExportInstr(ExportInstr::pos, export_slot, value);
262 
263    m_output_registers[nir_intrinsic_base(&intr)] = &m_last_pos_export->value();
264 
265    m_parent->emit_instruction(m_last_pos_export);
266 
267    return true;
268 }
269 
270 bool
emit_varying_param(const store_loc & store_info,nir_intrinsic_instr & intr)271 VertexExportForFs::emit_varying_param(const store_loc& store_info,
272                                       nir_intrinsic_instr& intr)
273 {
274    sfn_log << SfnLog::io << __func__ << ": emit DDL: " << store_info.driver_location
275            << "\n";
276 
277    int write_mask = nir_intrinsic_write_mask(&intr) << store_info.frac;
278    RegisterVec4::Swizzle swizzle;
279    for (int i = 0; i < 4; ++i)
280       swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7;
281 
282    Pin pin = util_bitcount(write_mask) > 1 ? pin_group : pin_free;
283 
284    int export_slot = m_parent->output(nir_intrinsic_base(&intr)).export_param();
285    assert(export_slot >= 0);
286    auto value = m_parent->value_factory().temp_vec4(pin, swizzle);
287 
288    AluInstr *alu = nullptr;
289    for (int i = 0; i < 4; ++i) {
290       if (swizzle[i] < 4) {
291          alu = new AluInstr(op1_mov,
292                             value[i],
293                             m_parent->value_factory().src(intr.src[0], swizzle[i]),
294                             AluInstr::write);
295          m_parent->emit_instruction(alu);
296       }
297    }
298    if (alu)
299       alu->set_alu_flag(alu_last_instr);
300 
301    m_last_param_export = new ExportInstr(ExportInstr::param, export_slot, value);
302    m_output_registers[nir_intrinsic_base(&intr)] = &m_last_param_export->value();
303 
304    m_parent->emit_instruction(m_last_param_export);
305 
306    return true;
307 }
308 
309 bool
emit_stream(int stream)310 VertexExportForFs::emit_stream(int stream)
311 {
312    assert(m_so_info);
313    if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
314       R600_ASM_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
315       return false;
316    }
317    for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
318       if (m_so_info->output[i].output_buffer >= 4) {
319          R600_ASM_ERR("Exceeded the max number of stream output buffers, got: %d\n",
320                       m_so_info->output[i].output_buffer);
321          return false;
322       }
323    }
324    const RegisterVec4 *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
325    unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
326    std::vector<RegisterVec4> tmp(m_so_info->num_outputs);
327 
328    /* Initialize locations where the outputs are stored. */
329    for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
330       if (stream != -1 && stream != m_so_info->output[i].stream)
331          continue;
332 
333       sfn_log << SfnLog::instr << "Emit stream " << i << " with register index "
334               << m_so_info->output[i].register_index << "  so_gpr:";
335 
336       so_gpr[i] = output_register(m_so_info->output[i].register_index);
337 
338       if (!so_gpr[i]) {
339          sfn_log << SfnLog::err << "\nERR: register index "
340                  << m_so_info->output[i].register_index
341                  << " doesn't correspond to an output register\n";
342          return false;
343       }
344       start_comp[i] = m_so_info->output[i].start_component;
345       /* Lower outputs with dst_offset < start_component.
346        *
347        * We can only output 4D vectors with a write mask, e.g. we can
348        * only output the W component at offset 3, etc. If we want
349        * to store Y, Z, or W at buffer offset 0, we need to use MOV
350        * to move it to X and output X. */
351 
352       bool need_copy =
353          m_so_info->output[i].dst_offset < m_so_info->output[i].start_component;
354 
355       int sc = m_so_info->output[i].start_component;
356       for (int j = 0; j < m_so_info->output[i].num_components; j++) {
357          if ((*so_gpr[i])[j + sc]->chan() != j + sc) {
358             need_copy = true;
359             break;
360          }
361       }
362       if (need_copy) {
363          RegisterVec4::Swizzle swizzle = {0, 1, 2, 3};
364          for (auto j = m_so_info->output[i].num_components; j < 4; ++j)
365             swizzle[j] = 7;
366          tmp[i] = m_parent->value_factory().temp_vec4(pin_group, swizzle);
367 
368          AluInstr *alu = nullptr;
369          for (int j = 0; j < m_so_info->output[i].num_components; j++) {
370             alu = new AluInstr(op1_mov, tmp[i][j], (*so_gpr[i])[j + sc], {alu_write});
371             m_parent->emit_instruction(alu);
372          }
373          if (alu)
374             alu->set_alu_flag(alu_last_instr);
375 
376          start_comp[i] = 0;
377          so_gpr[i] = &tmp[i];
378       }
379       sfn_log << SfnLog::instr << *so_gpr[i] << "\n";
380    }
381 
382    uint32_t enabled_stream_buffers_mask = 0;
383    /* Write outputs to buffers. */
384    for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
385       sfn_log << SfnLog::instr << "Write output buffer " << i << " with register index "
386               << m_so_info->output[i].register_index << "\n";
387 
388       auto out_stream =
389          new StreamOutInstr(*so_gpr[i],
390                             m_so_info->output[i].num_components,
391                             m_so_info->output[i].dst_offset - start_comp[i],
392                             ((1 << m_so_info->output[i].num_components) - 1)
393                                << start_comp[i],
394                             m_so_info->output[i].output_buffer,
395                             m_so_info->output[i].stream);
396       m_parent->emit_instruction(out_stream);
397       enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer)
398                                      << m_so_info->output[i].stream * 4;
399    }
400    m_parent->combine_enabled_stream_buffers_mask(enabled_stream_buffers_mask);
401    return true;
402 }
403 
404 const RegisterVec4 *
output_register(int loc) const405 VertexExportForFs::output_register(int loc) const
406 {
407    const RegisterVec4 *retval = nullptr;
408    auto val = m_output_registers.find(loc);
409    if (val != m_output_registers.end())
410       retval = val->second;
411    return retval;
412 }
413 
VertexShader(const pipe_stream_output_info * so_info,r600_shader * gs_shader,const r600_shader_key & key)414 VertexShader::VertexShader(const pipe_stream_output_info *so_info,
415                            r600_shader *gs_shader,
416                            const r600_shader_key& key):
417     VertexStageShader("VS", key.vs.first_atomic_counter),
418     m_vs_as_gs_a(key.vs.as_gs_a)
419 {
420    if (key.vs.as_es)
421       m_export_stage = new VertexExportForGS(this, gs_shader);
422    else if (key.vs.as_ls)
423       m_export_stage = new VertexExportForTCS(this);
424    else
425       m_export_stage = new VertexExportForFs(this, so_info, key);
426 }
427 
428 bool
do_scan_instruction(nir_instr * instr)429 VertexShader::do_scan_instruction(nir_instr *instr)
430 {
431    if (instr->type != nir_instr_type_intrinsic)
432       return false;
433 
434    auto intr = nir_instr_as_intrinsic(instr);
435 
436    switch (intr->intrinsic) {
437    case nir_intrinsic_load_input: {
438       int vtx_register = nir_intrinsic_base(intr) + 1;
439       if (m_last_vertex_attribute_register < vtx_register)
440          m_last_vertex_attribute_register = vtx_register;
441       return true;
442    }
443    case nir_intrinsic_store_output: {
444       auto location = static_cast<gl_varying_slot>(nir_intrinsic_io_semantics(intr).location);
445 
446       if (nir_intrinsic_io_semantics(intr).no_varying &&
447           (location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1)) {
448          break;
449       }
450 
451       int driver_location = nir_intrinsic_base(intr);
452 
453       int write_mask =
454          location == VARYING_SLOT_LAYER ? 1 << 2 : nir_intrinsic_write_mask(intr);
455 
456       ShaderOutput output(driver_location, write_mask, location);
457 
458       add_output(output);
459       break;
460    }
461    case nir_intrinsic_load_vertex_id:
462       m_sv_values.set(es_vertexid);
463       break;
464    case nir_intrinsic_load_instance_id:
465       m_sv_values.set(es_instanceid);
466       break;
467    case nir_intrinsic_load_primitive_id:
468       m_sv_values.set(es_primitive_id);
469       break;
470    case nir_intrinsic_load_tcs_rel_patch_id_r600:
471       m_sv_values.set(es_rel_patch_id);
472       break;
473    default:
474       return false;
475    }
476 
477    return true;
478 }
479 
480 bool
load_input(nir_intrinsic_instr * intr)481 VertexShader::load_input(nir_intrinsic_instr *intr)
482 {
483    unsigned driver_location = nir_intrinsic_base(intr);
484    unsigned location = nir_intrinsic_io_semantics(intr).location;
485    auto& vf = value_factory();
486 
487    AluInstr *ir = nullptr;
488    if (location < VERT_ATTRIB_MAX) {
489       for (unsigned i = 0; i < intr->def.num_components; ++i) {
490          auto src = vf.allocate_pinned_register(driver_location + 1, i);
491          src->set_flag(Register::ssa);
492          vf.inject_value(intr->def, i, src);
493       }
494       if (ir)
495          ir->set_alu_flag(alu_last_instr);
496 
497       ShaderInput input(driver_location);
498       input.set_gpr(driver_location + 1);
499       add_input(input);
500       return true;
501    }
502    fprintf(stderr, "r600-NIR: Unimplemented load_deref for %d\n", location);
503    return false;
504 }
505 
506 int
do_allocate_reserved_registers()507 VertexShader::do_allocate_reserved_registers()
508 {
509    if (m_sv_values.test(es_vertexid)) {
510       m_vertex_id = value_factory().allocate_pinned_register(0, 0);
511    }
512 
513    if (m_sv_values.test(es_instanceid)) {
514       m_instance_id = value_factory().allocate_pinned_register(0, 3);
515    }
516 
517    if (m_sv_values.test(es_primitive_id) || m_vs_as_gs_a) {
518       auto primitive_id = value_factory().allocate_pinned_register(0, 2);
519       set_primitive_id(primitive_id);
520    }
521 
522    if (m_sv_values.test(es_rel_patch_id)) {
523       m_rel_vertex_id = value_factory().allocate_pinned_register(0, 1);
524    }
525 
526    return m_last_vertex_attribute_register + 1;
527 }
528 
529 bool
store_output(nir_intrinsic_instr * intr)530 VertexShader::store_output(nir_intrinsic_instr *intr)
531 {
532    return m_export_stage->store_output(*intr);
533 }
534 
535 bool
process_stage_intrinsic(nir_intrinsic_instr * intr)536 VertexShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
537 {
538    switch (intr->intrinsic) {
539    case nir_intrinsic_load_vertex_id:
540       return emit_simple_mov(intr->def, 0, m_vertex_id);
541    case nir_intrinsic_load_instance_id:
542       return emit_simple_mov(intr->def, 0, m_instance_id);
543    case nir_intrinsic_load_primitive_id:
544       return emit_simple_mov(intr->def, 0, primitive_id());
545    case nir_intrinsic_load_tcs_rel_patch_id_r600:
546       return emit_simple_mov(intr->def, 0, m_rel_vertex_id);
547    default:
548       return false;
549    }
550 }
551 
552 void
do_finalize()553 VertexShader::do_finalize()
554 {
555    m_export_stage->finalize();
556 }
557 
558 bool
read_prop(std::istream & is)559 VertexShader::read_prop(std::istream& is)
560 {
561    (void)is;
562    return false;
563 }
564 
565 void
do_print_properties(std::ostream & os) const566 VertexShader::do_print_properties(std::ostream& os) const
567 {
568    (void)os;
569 }
570 
VertexExportForGS(VertexStageShader * parent,const r600_shader * gs_shader)571 VertexExportForGS::VertexExportForGS(VertexStageShader *parent,
572                                      const r600_shader *gs_shader):
573     VertexExportStage(parent),
574     m_gs_shader(gs_shader)
575 {
576 }
577 
578 bool
do_store_output(const store_loc & store_info,nir_intrinsic_instr & instr)579 VertexExportForGS::do_store_output(const store_loc& store_info,
580                                    nir_intrinsic_instr& instr)
581 {
582    int ring_offset = -1;
583    auto out_io = m_parent->output(store_info.driver_location);
584 
585    sfn_log << SfnLog::io << "check output " << store_info.driver_location
586            << " varying_slot=" << static_cast<int>(out_io.varying_slot()) << "\n";
587 
588    for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
589       auto& in_io = m_gs_shader->input[k];
590       sfn_log << SfnLog::io << "  against  " << k
591               << " varying_slot=" << static_cast<int>(in_io.varying_slot) << "\n";
592 
593       if (in_io.varying_slot == out_io.varying_slot()) {
594          ring_offset = in_io.ring_offset;
595          break;
596       }
597    }
598 
599    if (store_info.location == VARYING_SLOT_VIEWPORT) {
600       m_vs_out_viewport = 1;
601       m_vs_out_misc_write = 1;
602       return true;
603    }
604 
605    if (ring_offset == -1) {
606       sfn_log << SfnLog::warn << "VS defines output at "
607               << store_info.driver_location
608               << " varying_slot=" << static_cast<int>(out_io.varying_slot())
609               << " that is not consumed as GS input\n";
610       return true;
611    }
612 
613    RegisterVec4::Swizzle src_swz = {7, 7, 7, 7};
614    for (int i = 0; i < 4; ++i)
615       src_swz[i] = i < instr.num_components ? i : 7;
616 
617    auto value = m_parent->value_factory().temp_vec4(pin_chgr, src_swz);
618 
619    AluInstr *ir = nullptr;
620    for (unsigned int i = 0; i < instr.num_components; ++i) {
621       ir = new AluInstr(op1_mov,
622                         value[i],
623                         m_parent->value_factory().src(instr.src[store_info.data_loc], i),
624                         AluInstr::write);
625       m_parent->emit_instruction(ir);
626    }
627    if (ir)
628       ir->set_alu_flag(alu_last_instr);
629 
630    m_parent->emit_instruction(new MemRingOutInstr(
631       cf_mem_ring, MemRingOutInstr::mem_write, value, ring_offset >> 2, 4, nullptr));
632 
633    if (store_info.location == VARYING_SLOT_CLIP_DIST0 ||
634        store_info.location == VARYING_SLOT_CLIP_DIST1)
635       m_num_clip_dist += 4;
636 
637    return true;
638 }
639 
640 void
finalize()641 VertexExportForGS::finalize()
642 {
643 }
644 
645 void
get_shader_info(r600_shader * sh_info) const646 VertexExportForGS::get_shader_info(r600_shader *sh_info) const
647 {
648    sh_info->vs_out_viewport = m_vs_out_viewport;
649    sh_info->vs_out_misc_write = m_vs_out_misc_write;
650    sh_info->vs_as_es = true;
651 }
652 
VertexExportForTCS(VertexStageShader * parent)653 VertexExportForTCS::VertexExportForTCS(VertexStageShader *parent):
654     VertexExportStage(parent)
655 {
656 }
657 
658 void
finalize()659 VertexExportForTCS::finalize()
660 {
661 }
662 
663 void
get_shader_info(r600_shader * sh_info) const664 VertexExportForTCS::get_shader_info(r600_shader *sh_info) const
665 {
666    sh_info->vs_as_ls = 1;
667 }
668 
669 bool
do_store_output(const store_loc & store_info,nir_intrinsic_instr & intr)670 VertexExportForTCS::do_store_output(const store_loc& store_info,
671                                     nir_intrinsic_instr& intr)
672 {
673    (void)store_info;
674    (void)intr;
675    return true;
676 }
677 
678 } // namespace r600
679