1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_shader_gs.h"
28
29 #include "sfn_debug.h"
30 #include "sfn_instr_fetch.h"
31
32 namespace r600 {
33
GeometryShader(const r600_shader_key & key)34 GeometryShader::GeometryShader(const r600_shader_key& key):
35 Shader("GS", key.gs.first_atomic_counter),
36 m_tri_strip_adj_fix(key.gs.tri_strip_adj_fix)
37 {
38 }
39
40 bool
do_scan_instruction(nir_instr * instr)41 GeometryShader::do_scan_instruction(nir_instr *instr)
42 {
43 if (instr->type != nir_instr_type_intrinsic)
44 return false;
45
46 nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
47
48 switch (ii->intrinsic) {
49 case nir_intrinsic_store_output:
50 return process_store_output(ii);
51 case nir_intrinsic_load_per_vertex_input:
52 return process_load_input(ii);
53 default:
54 return false;
55 }
56 }
57
58 bool
process_store_output(nir_intrinsic_instr * instr)59 GeometryShader::process_store_output(nir_intrinsic_instr *instr)
60 {
61 auto location = static_cast<gl_varying_slot>(nir_intrinsic_io_semantics(instr).location);
62 auto index = nir_src_as_const_value(instr->src[1]);
63 assert(index);
64
65 auto driver_location = nir_intrinsic_base(instr) + index->u32;
66
67 if (location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1 ||
68 (location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31) ||
69 (location >= VARYING_SLOT_TEX0 && location <= VARYING_SLOT_TEX7) ||
70 location == VARYING_SLOT_BFC0 || location == VARYING_SLOT_BFC1 ||
71 location == VARYING_SLOT_PNTC || location == VARYING_SLOT_CLIP_VERTEX ||
72 location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1 ||
73 location == VARYING_SLOT_PRIMITIVE_ID || location == VARYING_SLOT_POS ||
74 location == VARYING_SLOT_PSIZ || location == VARYING_SLOT_LAYER ||
75 location == VARYING_SLOT_VIEWPORT || location == VARYING_SLOT_FOGC) {
76
77 auto write_mask = nir_intrinsic_write_mask(instr);
78 ShaderOutput output(driver_location, write_mask, location);
79
80 if (nir_intrinsic_io_semantics(instr).no_varying)
81 output.set_no_varying(true);
82 if (nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX)
83 add_output(output);
84
85 if (location == VARYING_SLOT_VIEWPORT) {
86 m_out_viewport = true;
87 m_out_misc_write = true;
88 }
89
90 if (location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1) {
91 auto write_mask = nir_intrinsic_write_mask(instr);
92 m_cc_dist_mask |= write_mask << (4 * (location - VARYING_SLOT_CLIP_DIST0));
93 m_clip_dist_write |= write_mask << (4 * (location - VARYING_SLOT_CLIP_DIST0));
94 }
95
96 if (m_noutputs <= driver_location &&
97 nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX)
98 m_noutputs = driver_location + 1;
99
100 return true;
101 }
102 return false;
103 }
104
105 bool
process_load_input(nir_intrinsic_instr * instr)106 GeometryShader::process_load_input(nir_intrinsic_instr *instr)
107 {
108 auto location = static_cast<gl_varying_slot>(nir_intrinsic_io_semantics(instr).location);
109 auto index = nir_src_as_const_value(instr->src[1]);
110 assert(index);
111
112 auto driver_location = nir_intrinsic_base(instr) + index->u32;
113
114 if (location == VARYING_SLOT_POS || location == VARYING_SLOT_PSIZ ||
115 location == VARYING_SLOT_FOGC || location == VARYING_SLOT_CLIP_VERTEX ||
116 location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1 ||
117 location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1 ||
118 location == VARYING_SLOT_BFC0 || location == VARYING_SLOT_BFC1 ||
119 location == VARYING_SLOT_PNTC ||
120 (location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31) ||
121 (location >= VARYING_SLOT_TEX0 && location <= VARYING_SLOT_TEX7)) {
122
123 uint64_t bit = 1ull << location;
124 if (!(bit & m_input_mask)) {
125 ShaderInput input(driver_location, location);
126 input.set_ring_offset(16 * driver_location);
127 add_input(input);
128 m_next_input_ring_offset += 16;
129 m_input_mask |= bit;
130 }
131 return true;
132 }
133 return false;
134 }
135
136 int
do_allocate_reserved_registers()137 GeometryShader::do_allocate_reserved_registers()
138 {
139 const int sel[6] = {0, 0, 0, 1, 1, 1};
140 const int chan[6] = {0, 1, 3, 0, 1, 2};
141
142 /* Reserve registers used by the shaders (should check how many
143 * components are actually used */
144 for (int i = 0; i < 6; ++i) {
145 m_per_vertex_offsets[i] = value_factory().allocate_pinned_register(sel[i], chan[i]);
146 }
147
148 m_primitive_id = value_factory().allocate_pinned_register(0, 2);
149 m_invocation_id = value_factory().allocate_pinned_register(1, 3);
150
151 value_factory().set_virtual_register_base(2);
152
153 auto zero = value_factory().inline_const(ALU_SRC_0, 0);
154
155 for (int i = 0; i < 4; ++i) {
156 m_export_base[i] = value_factory().temp_register(0, false);
157 emit_instruction(
158 new AluInstr(op1_mov, m_export_base[i], zero, AluInstr::last_write));
159 }
160
161 m_ring_item_sizes[0] = m_next_input_ring_offset;
162
163 /* GS thread with no output workaround - emit a cut at start of GS */
164 if (chip_class() == ISA_CC_R600) {
165 emit_instruction(new EmitVertexInstr(0, true));
166 start_new_block(0);
167 }
168
169 if (m_tri_strip_adj_fix)
170 emit_adj_fix();
171
172 return value_factory().next_register_index();
173 }
174
175 bool
process_stage_intrinsic(nir_intrinsic_instr * intr)176 GeometryShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
177 {
178 switch (intr->intrinsic) {
179 case nir_intrinsic_emit_vertex:
180 return emit_vertex(intr, false);
181 case nir_intrinsic_end_primitive:
182 return emit_vertex(intr, true);
183 case nir_intrinsic_load_primitive_id:
184 return emit_simple_mov(intr->def, 0, m_primitive_id);
185 case nir_intrinsic_load_invocation_id:
186 return emit_simple_mov(intr->def, 0, m_invocation_id);
187 case nir_intrinsic_load_per_vertex_input:
188 return emit_load_per_vertex_input(intr);
189 default:;
190 }
191 return false;
192 }
193
194 bool
emit_vertex(nir_intrinsic_instr * instr,bool cut)195 GeometryShader::emit_vertex(nir_intrinsic_instr *instr, bool cut)
196 {
197 int stream = nir_intrinsic_stream_id(instr);
198 assert(stream < 4);
199
200 auto cut_instr = new EmitVertexInstr(stream, cut);
201
202 for (auto v : m_streamout_data) {
203 if (stream == 0 || v.first != VARYING_SLOT_POS) {
204 v.second->patch_ring(stream, m_export_base[stream]);
205 cut_instr->add_required_instr(v.second);
206 emit_instruction(v.second);
207 } else
208 delete v.second;
209 }
210 m_streamout_data.clear();
211
212 emit_instruction(cut_instr);
213 start_new_block(0);
214
215 if (!cut) {
216 auto ir = new AluInstr(op2_add_int,
217 m_export_base[stream],
218 m_export_base[stream],
219 value_factory().literal(m_noutputs),
220 AluInstr::last_write);
221 emit_instruction(ir);
222 }
223
224 return true;
225 }
226
227 bool
store_output(nir_intrinsic_instr * instr)228 GeometryShader::store_output(nir_intrinsic_instr *instr)
229 {
230 if (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_CLIP_VERTEX)
231 return true;
232
233 auto location = nir_intrinsic_io_semantics(instr).location;
234 auto index = nir_src_as_const_value(instr->src[1]);
235 assert(index);
236 auto driver_location = nir_intrinsic_base(instr) + index->u32;
237
238 uint32_t write_mask = nir_intrinsic_write_mask(instr);
239 uint32_t shift = nir_intrinsic_component(instr);
240
241 RegisterVec4::Swizzle src_swz{7, 7, 7, 7};
242 for (unsigned i = shift; i < 4; ++i) {
243 src_swz[i] = (1 << i) & (write_mask << shift) ? i - shift : 7;
244 }
245
246 auto out_value = value_factory().src_vec4(instr->src[0], pin_free, src_swz);
247
248 AluInstr *ir = nullptr;
249 if (m_streamout_data[location]) {
250 const auto& value = m_streamout_data[location]->value();
251 auto tmp = value_factory().temp_vec4(pin_chgr);
252 for (unsigned i = 0; i < 4 - shift; ++i) {
253 if (!(write_mask & (1 << i)))
254 continue;
255 if (out_value[i + shift]->chan() < 4) {
256 ir = new AluInstr(op1_mov,
257 tmp[i + shift],
258 out_value[i + shift],
259 AluInstr::write);
260 } else if (value[i]->chan() < 4) {
261 ir = new AluInstr(op1_mov, tmp[i + shift], value[i], AluInstr::write);
262 } else
263 continue;
264 emit_instruction(ir);
265 }
266 ir->set_alu_flag(alu_last_instr);
267 m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring,
268 MemRingOutInstr::mem_write_ind,
269 tmp,
270 4 * driver_location,
271 instr->num_components,
272 m_export_base[0]);
273 } else {
274
275 sfn_log << SfnLog::io << "None-streamout ";
276 bool need_copy = shift != 0;
277 if (!need_copy) {
278 for (int i = 0; i < 4; ++i) {
279 if ((write_mask & (1 << i)) && (out_value[i]->chan() != i)) {
280 need_copy = true;
281 break;
282 }
283 }
284 }
285
286 if (need_copy) {
287 auto tmp = value_factory().temp_vec4(pin_chgr);
288 for (unsigned i = 0; i < 4 - shift; ++i) {
289 if (out_value[i]->chan() < 4) {
290 ir = new AluInstr(op1_mov, tmp[i], out_value[i], AluInstr::write);
291 emit_instruction(ir);
292 }
293 }
294 ir->set_alu_flag(alu_last_instr);
295 m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring,
296 MemRingOutInstr::mem_write_ind,
297 tmp,
298 4 * driver_location,
299 instr->num_components,
300 m_export_base[0]);
301 } else {
302 for (auto i = 0; i < 4; ++i)
303 out_value[i]->set_pin(pin_chgr);
304 m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring,
305 MemRingOutInstr::mem_write_ind,
306 out_value,
307 4 * driver_location,
308 instr->num_components,
309 m_export_base[0]);
310 }
311 }
312
313 return true;
314 }
315
316 bool
emit_load_per_vertex_input(nir_intrinsic_instr * instr)317 GeometryShader::emit_load_per_vertex_input(nir_intrinsic_instr *instr)
318 {
319 auto dest = value_factory().dest_vec4(instr->def, pin_group);
320
321 RegisterVec4::Swizzle dest_swz{7, 7, 7, 7};
322 for (unsigned i = 0; i < instr->def.num_components; ++i) {
323 dest_swz[i] = i + nir_intrinsic_component(instr);
324 }
325
326 auto literal_index = nir_src_as_const_value(instr->src[0]);
327
328 if (!literal_index) {
329 sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n";
330 return false;
331 }
332 assert(literal_index->u32 < 6);
333 assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
334
335 EVTXDataFormat fmt =
336 chip_class() >= ISA_CC_EVERGREEN ? fmt_invalid : fmt_32_32_32_32_float;
337
338 auto addr = m_per_vertex_offsets[literal_index->u32];
339 auto fetch = new LoadFromBuffer(dest,
340 dest_swz,
341 addr,
342 16 * nir_intrinsic_base(instr),
343 R600_GS_RING_CONST_BUFFER,
344 nullptr,
345 fmt);
346
347 if (chip_class() >= ISA_CC_EVERGREEN)
348 fetch->set_fetch_flag(FetchInstr::use_const_field);
349
350 fetch->set_num_format(vtx_nf_norm);
351 fetch->reset_fetch_flag(FetchInstr::format_comp_signed);
352
353 emit_instruction(fetch);
354 return true;
355 }
356
357 void
do_finalize()358 GeometryShader::do_finalize()
359 {
360 }
361
362 void
do_get_shader_info(r600_shader * sh_info)363 GeometryShader::do_get_shader_info(r600_shader *sh_info)
364 {
365 sh_info->processor_type = PIPE_SHADER_GEOMETRY;
366 sh_info->ring_item_sizes[0] = m_ring_item_sizes[0];
367 sh_info->cc_dist_mask = m_cc_dist_mask;
368 sh_info->clip_dist_write = m_clip_dist_write;
369 }
370
371 bool
read_prop(std::istream & is)372 GeometryShader::read_prop(std::istream& is)
373 {
374 (void)is;
375 return true;
376 }
377
378 void
do_print_properties(std::ostream & os) const379 GeometryShader::do_print_properties(std::ostream& os) const
380 {
381 (void)os;
382 }
383
384 void
emit_adj_fix()385 GeometryShader::emit_adj_fix()
386 {
387 auto adjhelp0 = value_factory().temp_register();
388
389 emit_instruction(new AluInstr(op2_and_int,
390 adjhelp0,
391 m_primitive_id,
392 value_factory().one_i(),
393 AluInstr::last_write));
394
395 int reg_indices[6];
396 int rotate_indices[6] = {4, 5, 0, 1, 2, 3};
397
398 reg_indices[0] = reg_indices[1] = reg_indices[2] = m_export_base[1]->sel();
399 reg_indices[3] = reg_indices[4] = reg_indices[5] = m_export_base[2]->sel();
400
401 std::array<PRegister, 6> adjhelp;
402
403 AluInstr *ir = nullptr;
404 for (int i = 0; i < 6; i++) {
405 adjhelp[i] = value_factory().temp_register();
406 ir = new AluInstr(op3_cnde_int,
407 adjhelp[i],
408 adjhelp0,
409 m_per_vertex_offsets[i],
410 m_per_vertex_offsets[rotate_indices[i]],
411 AluInstr::write);
412
413 emit_instruction(ir);
414 }
415 ir->set_alu_flag(alu_last_instr);
416
417 for (int i = 0; i < 6; i++)
418 m_per_vertex_offsets[i] = adjhelp[i];
419 }
420
421 } // namespace r600
422