• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_shader_gs.h"
28 
29 #include "sfn_debug.h"
30 #include "sfn_instr_fetch.h"
31 
32 namespace r600 {
33 
GeometryShader(const r600_shader_key & key)34 GeometryShader::GeometryShader(const r600_shader_key& key):
35     Shader("GS", key.gs.first_atomic_counter),
36     m_tri_strip_adj_fix(key.gs.tri_strip_adj_fix)
37 {
38 }
39 
40 bool
do_scan_instruction(nir_instr * instr)41 GeometryShader::do_scan_instruction(nir_instr *instr)
42 {
43    if (instr->type != nir_instr_type_intrinsic)
44       return false;
45 
46    nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
47 
48    switch (ii->intrinsic) {
49    case nir_intrinsic_store_output:
50       return process_store_output(ii);
51    case nir_intrinsic_load_per_vertex_input:
52       return process_load_input(ii);
53    default:
54       return false;
55    }
56 }
57 
58 bool
process_store_output(nir_intrinsic_instr * instr)59 GeometryShader::process_store_output(nir_intrinsic_instr *instr)
60 {
61    auto location = static_cast<gl_varying_slot>(nir_intrinsic_io_semantics(instr).location);
62    auto index = nir_src_as_const_value(instr->src[1]);
63    assert(index);
64 
65    auto driver_location = nir_intrinsic_base(instr) + index->u32;
66 
67    if (location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1 ||
68        (location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31) ||
69        (location >= VARYING_SLOT_TEX0 && location <= VARYING_SLOT_TEX7) ||
70        location == VARYING_SLOT_BFC0 || location == VARYING_SLOT_BFC1 ||
71        location == VARYING_SLOT_PNTC || location == VARYING_SLOT_CLIP_VERTEX ||
72        location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1 ||
73        location == VARYING_SLOT_PRIMITIVE_ID || location == VARYING_SLOT_POS ||
74        location == VARYING_SLOT_PSIZ || location == VARYING_SLOT_LAYER ||
75        location == VARYING_SLOT_VIEWPORT || location == VARYING_SLOT_FOGC) {
76 
77       auto write_mask = nir_intrinsic_write_mask(instr);
78       ShaderOutput output(driver_location, write_mask, location);
79 
80       if (nir_intrinsic_io_semantics(instr).no_varying)
81          output.set_no_varying(true);
82       if (nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX)
83          add_output(output);
84 
85       if (location == VARYING_SLOT_VIEWPORT) {
86          m_out_viewport = true;
87          m_out_misc_write = true;
88       }
89 
90       if (location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1) {
91          auto write_mask = nir_intrinsic_write_mask(instr);
92          m_cc_dist_mask |= write_mask << (4 * (location - VARYING_SLOT_CLIP_DIST0));
93          m_clip_dist_write |= write_mask << (4 * (location - VARYING_SLOT_CLIP_DIST0));
94       }
95 
96       if (m_noutputs <= driver_location &&
97           nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX)
98          m_noutputs = driver_location + 1;
99 
100       return true;
101    }
102    return false;
103 }
104 
105 bool
process_load_input(nir_intrinsic_instr * instr)106 GeometryShader::process_load_input(nir_intrinsic_instr *instr)
107 {
108    auto location = static_cast<gl_varying_slot>(nir_intrinsic_io_semantics(instr).location);
109    auto index = nir_src_as_const_value(instr->src[1]);
110    assert(index);
111 
112    auto driver_location = nir_intrinsic_base(instr) + index->u32;
113 
114    if (location == VARYING_SLOT_POS || location == VARYING_SLOT_PSIZ ||
115        location == VARYING_SLOT_FOGC || location == VARYING_SLOT_CLIP_VERTEX ||
116        location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1 ||
117        location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1 ||
118        location == VARYING_SLOT_BFC0 || location == VARYING_SLOT_BFC1 ||
119        location == VARYING_SLOT_PNTC ||
120        (location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31) ||
121        (location >= VARYING_SLOT_TEX0 && location <= VARYING_SLOT_TEX7)) {
122 
123       uint64_t bit = 1ull << location;
124       if (!(bit & m_input_mask)) {
125          ShaderInput input(driver_location, location);
126          input.set_ring_offset(16 * driver_location);
127          add_input(input);
128          m_next_input_ring_offset += 16;
129          m_input_mask |= bit;
130       }
131       return true;
132    }
133    return false;
134 }
135 
136 int
do_allocate_reserved_registers()137 GeometryShader::do_allocate_reserved_registers()
138 {
139    const int sel[6] = {0, 0, 0, 1, 1, 1};
140    const int chan[6] = {0, 1, 3, 0, 1, 2};
141 
142    /* Reserve registers used by the shaders (should check how many
143     * components are actually used */
144    for (int i = 0; i < 6; ++i) {
145       m_per_vertex_offsets[i] = value_factory().allocate_pinned_register(sel[i], chan[i]);
146    }
147 
148    m_primitive_id = value_factory().allocate_pinned_register(0, 2);
149    m_invocation_id = value_factory().allocate_pinned_register(1, 3);
150 
151    value_factory().set_virtual_register_base(2);
152 
153    auto zero = value_factory().inline_const(ALU_SRC_0, 0);
154 
155    for (int i = 0; i < 4; ++i) {
156       m_export_base[i] = value_factory().temp_register(0, false);
157       emit_instruction(
158          new AluInstr(op1_mov, m_export_base[i], zero, AluInstr::last_write));
159    }
160 
161    m_ring_item_sizes[0] = m_next_input_ring_offset;
162 
163    /* GS thread with no output workaround - emit a cut at start of GS */
164    if (chip_class() == ISA_CC_R600) {
165       emit_instruction(new EmitVertexInstr(0, true));
166       start_new_block(0);
167    }
168 
169    if (m_tri_strip_adj_fix)
170       emit_adj_fix();
171 
172    return value_factory().next_register_index();
173 }
174 
175 bool
process_stage_intrinsic(nir_intrinsic_instr * intr)176 GeometryShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
177 {
178    switch (intr->intrinsic) {
179    case nir_intrinsic_emit_vertex:
180       return emit_vertex(intr, false);
181    case nir_intrinsic_end_primitive:
182       return emit_vertex(intr, true);
183    case nir_intrinsic_load_primitive_id:
184       return emit_simple_mov(intr->def, 0, m_primitive_id);
185    case nir_intrinsic_load_invocation_id:
186       return emit_simple_mov(intr->def, 0, m_invocation_id);
187    case nir_intrinsic_load_per_vertex_input:
188       return emit_load_per_vertex_input(intr);
189    default:;
190    }
191    return false;
192 }
193 
194 bool
emit_vertex(nir_intrinsic_instr * instr,bool cut)195 GeometryShader::emit_vertex(nir_intrinsic_instr *instr, bool cut)
196 {
197    int stream = nir_intrinsic_stream_id(instr);
198    assert(stream < 4);
199 
200    auto cut_instr = new EmitVertexInstr(stream, cut);
201 
202    for (auto v : m_streamout_data) {
203       if (stream == 0 || v.first != VARYING_SLOT_POS) {
204          v.second->patch_ring(stream, m_export_base[stream]);
205          cut_instr->add_required_instr(v.second);
206          emit_instruction(v.second);
207       } else
208          delete v.second;
209    }
210    m_streamout_data.clear();
211 
212    emit_instruction(cut_instr);
213    start_new_block(0);
214 
215    if (!cut) {
216       auto ir = new AluInstr(op2_add_int,
217                              m_export_base[stream],
218                              m_export_base[stream],
219                              value_factory().literal(m_noutputs),
220                              AluInstr::last_write);
221       emit_instruction(ir);
222    }
223 
224    return true;
225 }
226 
227 bool
store_output(nir_intrinsic_instr * instr)228 GeometryShader::store_output(nir_intrinsic_instr *instr)
229 {
230    if (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_CLIP_VERTEX)
231       return true;
232 
233    auto location = nir_intrinsic_io_semantics(instr).location;
234    auto index = nir_src_as_const_value(instr->src[1]);
235    assert(index);
236    auto driver_location = nir_intrinsic_base(instr) + index->u32;
237 
238    uint32_t write_mask = nir_intrinsic_write_mask(instr);
239    uint32_t shift = nir_intrinsic_component(instr);
240 
241    RegisterVec4::Swizzle src_swz{7, 7, 7, 7};
242    for (unsigned i = shift; i < 4; ++i) {
243       src_swz[i] = (1 << i) & (write_mask << shift) ? i - shift : 7;
244    }
245 
246    auto out_value = value_factory().src_vec4(instr->src[0], pin_free, src_swz);
247 
248    AluInstr *ir = nullptr;
249    if (m_streamout_data[location]) {
250       const auto& value = m_streamout_data[location]->value();
251       auto tmp = value_factory().temp_vec4(pin_chgr);
252       for (unsigned i = 0; i < 4 - shift; ++i) {
253          if (!(write_mask & (1 << i)))
254             continue;
255          if (out_value[i + shift]->chan() < 4) {
256             ir = new AluInstr(op1_mov,
257                               tmp[i + shift],
258                               out_value[i + shift],
259                               AluInstr::write);
260          } else if (value[i]->chan() < 4) {
261             ir = new AluInstr(op1_mov, tmp[i + shift], value[i], AluInstr::write);
262          } else
263             continue;
264          emit_instruction(ir);
265       }
266       ir->set_alu_flag(alu_last_instr);
267       m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring,
268                                                        MemRingOutInstr::mem_write_ind,
269                                                        tmp,
270                                                        4 * driver_location,
271                                                        instr->num_components,
272                                                        m_export_base[0]);
273    } else {
274 
275       sfn_log << SfnLog::io << "None-streamout ";
276       bool need_copy = shift != 0;
277       if (!need_copy) {
278          for (int i = 0; i < 4; ++i) {
279             if ((write_mask & (1 << i)) && (out_value[i]->chan() != i)) {
280                need_copy = true;
281                break;
282             }
283          }
284       }
285 
286       if (need_copy) {
287          auto tmp = value_factory().temp_vec4(pin_chgr);
288          for (unsigned i = 0; i < 4 - shift; ++i) {
289             if (out_value[i]->chan() < 4) {
290                ir = new AluInstr(op1_mov, tmp[i], out_value[i], AluInstr::write);
291                emit_instruction(ir);
292             }
293          }
294          ir->set_alu_flag(alu_last_instr);
295          m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring,
296                                                           MemRingOutInstr::mem_write_ind,
297                                                           tmp,
298                                                           4 * driver_location,
299                                                           instr->num_components,
300                                                           m_export_base[0]);
301       } else {
302          for (auto i = 0; i < 4; ++i)
303             out_value[i]->set_pin(pin_chgr);
304          m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring,
305                                                           MemRingOutInstr::mem_write_ind,
306                                                           out_value,
307                                                           4 * driver_location,
308                                                           instr->num_components,
309                                                           m_export_base[0]);
310       }
311    }
312 
313    return true;
314 }
315 
316 bool
emit_load_per_vertex_input(nir_intrinsic_instr * instr)317 GeometryShader::emit_load_per_vertex_input(nir_intrinsic_instr *instr)
318 {
319    auto dest = value_factory().dest_vec4(instr->def, pin_group);
320 
321    RegisterVec4::Swizzle dest_swz{7, 7, 7, 7};
322    for (unsigned i = 0; i < instr->def.num_components; ++i) {
323       dest_swz[i] = i + nir_intrinsic_component(instr);
324    }
325 
326    auto literal_index = nir_src_as_const_value(instr->src[0]);
327 
328    if (!literal_index) {
329       sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n";
330       return false;
331    }
332    assert(literal_index->u32 < 6);
333    assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
334 
335    EVTXDataFormat fmt =
336       chip_class() >= ISA_CC_EVERGREEN ? fmt_invalid : fmt_32_32_32_32_float;
337 
338    auto addr = m_per_vertex_offsets[literal_index->u32];
339    auto fetch = new LoadFromBuffer(dest,
340                                    dest_swz,
341                                    addr,
342                                    16 * nir_intrinsic_base(instr),
343                                    R600_GS_RING_CONST_BUFFER,
344                                    nullptr,
345                                    fmt);
346 
347    if (chip_class() >= ISA_CC_EVERGREEN)
348       fetch->set_fetch_flag(FetchInstr::use_const_field);
349 
350    fetch->set_num_format(vtx_nf_norm);
351    fetch->reset_fetch_flag(FetchInstr::format_comp_signed);
352 
353    emit_instruction(fetch);
354    return true;
355 }
356 
357 void
do_finalize()358 GeometryShader::do_finalize()
359 {
360 }
361 
362 void
do_get_shader_info(r600_shader * sh_info)363 GeometryShader::do_get_shader_info(r600_shader *sh_info)
364 {
365    sh_info->processor_type = PIPE_SHADER_GEOMETRY;
366    sh_info->ring_item_sizes[0] = m_ring_item_sizes[0];
367    sh_info->cc_dist_mask = m_cc_dist_mask;
368    sh_info->clip_dist_write = m_clip_dist_write;
369 }
370 
371 bool
read_prop(std::istream & is)372 GeometryShader::read_prop(std::istream& is)
373 {
374    (void)is;
375    return true;
376 }
377 
378 void
do_print_properties(std::ostream & os) const379 GeometryShader::do_print_properties(std::ostream& os) const
380 {
381    (void)os;
382 }
383 
384 void
emit_adj_fix()385 GeometryShader::emit_adj_fix()
386 {
387    auto adjhelp0 = value_factory().temp_register();
388 
389    emit_instruction(new AluInstr(op2_and_int,
390                                  adjhelp0,
391                                  m_primitive_id,
392                                  value_factory().one_i(),
393                                  AluInstr::last_write));
394 
395    int reg_indices[6];
396    int rotate_indices[6] = {4, 5, 0, 1, 2, 3};
397 
398    reg_indices[0] = reg_indices[1] = reg_indices[2] = m_export_base[1]->sel();
399    reg_indices[3] = reg_indices[4] = reg_indices[5] = m_export_base[2]->sel();
400 
401    std::array<PRegister, 6> adjhelp;
402 
403    AluInstr *ir = nullptr;
404    for (int i = 0; i < 6; i++) {
405       adjhelp[i] = value_factory().temp_register();
406       ir = new AluInstr(op3_cnde_int,
407                         adjhelp[i],
408                         adjhelp0,
409                         m_per_vertex_offsets[i],
410                         m_per_vertex_offsets[rotate_indices[i]],
411                         AluInstr::write);
412 
413       emit_instruction(ir);
414    }
415    ir->set_alu_flag(alu_last_instr);
416 
417    for (int i = 0; i < 6; i++)
418       m_per_vertex_offsets[i] = adjhelp[i];
419 }
420 
421 } // namespace r600
422