1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "compiler/v3d_compiler.h"
25 #include "compiler/nir/nir_builder.h"
26
27 /**
28 * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
29 * intrinsics into something amenable to the V3D architecture.
30 *
31 * Most of the work is turning the VS's store_output intrinsics from working
32 * on a base representing the gallium-level vec4 driver_location to an offset
33 * within the VPM, and emitting the header that's read by the fixed function
34 * hardware between the VS and FS.
35 *
36 * We also adjust the offsets on uniform loads to be in bytes, since that's
37 * what we need for indirect addressing with general TMU access.
38 */
39
40 struct v3d_nir_lower_io_state {
41 int pos_vpm_offset;
42 int vp_vpm_offset;
43 int zs_vpm_offset;
44 int rcp_wc_vpm_offset;
45 int psiz_vpm_offset;
46 int varyings_vpm_offset;
47
48 /* Geometry shader state */
49 struct {
50 /* VPM offset for the current vertex data output */
51 nir_variable *output_offset_var;
52 /* VPM offset for the current vertex header */
53 nir_variable *header_offset_var;
54 /* VPM header for the current vertex */
55 nir_variable *header_var;
56
57 /* Size of the complete VPM output header */
58 uint32_t output_header_size;
59 /* Size of the output data for a single vertex */
60 uint32_t output_vertex_data_size;
61 } gs;
62
63 BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)];
64
65 nir_def *pos[4];
66 };
67
68 static void
69 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
70 struct v3d_nir_lower_io_state *state);
71
72 static void
v3d_nir_store_output(nir_builder * b,int base,nir_def * offset,nir_def * chan)73 v3d_nir_store_output(nir_builder *b, int base, nir_def *offset,
74 nir_def *chan)
75 {
76 if (offset) {
77 /* When generating the VIR instruction, the base and the offset
78 * are just going to get added together with an ADD instruction
79 * so we might as well do the add here at the NIR level instead
80 * and let the constant folding do its magic.
81 */
82 offset = nir_iadd_imm(b, offset, base);
83 base = 0;
84 } else {
85 offset = nir_imm_int(b, 0);
86 }
87
88 nir_store_output(b, chan, offset, .base = base, .write_mask = 0x1, .component = 0);
89 }
90
91 static int
v3d_varying_slot_vpm_offset(struct v3d_compile * c,unsigned location,unsigned component)92 v3d_varying_slot_vpm_offset(struct v3d_compile *c, unsigned location, unsigned component)
93 {
94 uint32_t num_used_outputs = 0;
95 struct v3d_varying_slot *used_outputs = NULL;
96 switch (c->s->info.stage) {
97 case MESA_SHADER_VERTEX:
98 num_used_outputs = c->vs_key->num_used_outputs;
99 used_outputs = c->vs_key->used_outputs;
100 break;
101 case MESA_SHADER_GEOMETRY:
102 num_used_outputs = c->gs_key->num_used_outputs;
103 used_outputs = c->gs_key->used_outputs;
104 break;
105 default:
106 unreachable("Unsupported shader stage");
107 }
108
109 for (int i = 0; i < num_used_outputs; i++) {
110 struct v3d_varying_slot slot = used_outputs[i];
111
112 if (v3d_slot_get_slot(slot) == location &&
113 v3d_slot_get_component(slot) == component) {
114 return i;
115 }
116 }
117
118 return -1;
119 }
120
121 /* Lowers a store_output(gallium driver location) to a series of store_outputs
122 * with a driver_location equal to the offset in the VPM.
123 *
124 * For geometry shaders we need to emit multiple vertices so the VPM offsets
125 * need to be computed in the shader code based on the current vertex index.
126 */
127 static void
v3d_nir_lower_vpm_output(struct v3d_compile * c,nir_builder * b,nir_intrinsic_instr * intr,struct v3d_nir_lower_io_state * state)128 v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
129 nir_intrinsic_instr *intr,
130 struct v3d_nir_lower_io_state *state)
131 {
132 b->cursor = nir_before_instr(&intr->instr);
133
134 /* If this is a geometry shader we need to emit our outputs
135 * to the current vertex offset in the VPM.
136 */
137 nir_def *offset_reg =
138 c->s->info.stage == MESA_SHADER_GEOMETRY ?
139 nir_load_var(b, state->gs.output_offset_var) : NULL;
140
141 int start_comp = nir_intrinsic_component(intr);
142 unsigned location = nir_intrinsic_io_semantics(intr).location;
143 nir_def *src = intr->src[0].ssa;
144 /* Save off the components of the position for the setup of VPM inputs
145 * read by fixed function HW.
146 */
147 if (location == VARYING_SLOT_POS) {
148 for (int i = 0; i < intr->num_components; i++) {
149 state->pos[start_comp + i] = nir_channel(b, src, i);
150 }
151 }
152
153 /* Just psiz to the position in the FF header right now. */
154 if (location == VARYING_SLOT_PSIZ &&
155 state->psiz_vpm_offset != -1) {
156 v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
157 }
158
159 if (location == VARYING_SLOT_LAYER) {
160 assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
161 nir_def *header = nir_load_var(b, state->gs.header_var);
162 header = nir_iand_imm(b, header, 0xff00ffff);
163
164 /* From the GLES 3.2 spec:
165 *
166 * "When fragments are written to a layered framebuffer, the
167 * fragment’s layer number selects an image from the array
168 * of images at each attachment (...). If the fragment’s
169 * layer number is negative, or greater than or equal to
170 * the minimum number of layers of any attachment, the
171 * effects of the fragment on the framebuffer contents are
172 * undefined."
173 *
174 * This suggests we can just ignore that situation, however,
175 * for V3D an out-of-bounds layer index means that the binner
176 * might do out-of-bounds writes access to the tile state. The
177 * simulator has an assert to catch this, so we play safe here
178 * and we make sure that doesn't happen by setting gl_Layer
179 * to 0 in that case (we always allocate tile state for at
180 * least one layer).
181 */
182 nir_def *fb_layers = nir_load_fb_layers_v3d(b, 32);
183 nir_def *cond = nir_ige(b, src, fb_layers);
184 nir_def *layer_id =
185 nir_bcsel(b, cond,
186 nir_imm_int(b, 0),
187 nir_ishl_imm(b, src, 16));
188 header = nir_ior(b, header, layer_id);
189 nir_store_var(b, state->gs.header_var, header, 0x1);
190 }
191
192 /* Scalarize outputs if it hasn't happened already, since we want to
193 * schedule each VPM write individually. We can skip any output
194 * components not read by the FS.
195 */
196 for (int i = 0; i < intr->num_components; i++) {
197 int vpm_offset =
198 v3d_varying_slot_vpm_offset(c, location, start_comp + i);
199
200 if (!(nir_intrinsic_write_mask(intr) & (1 << i)))
201 continue;
202
203 if (vpm_offset == -1)
204 continue;
205
206 if (nir_src_is_const(intr->src[1]))
207 vpm_offset += nir_src_as_uint(intr->src[1]) * 4;
208
209 BITSET_SET(state->varyings_stored, vpm_offset);
210
211 v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset,
212 offset_reg, nir_channel(b, src, i));
213 }
214
215 nir_instr_remove(&intr->instr);
216 }
217
218 static inline void
reset_gs_header(nir_builder * b,struct v3d_nir_lower_io_state * state)219 reset_gs_header(nir_builder *b, struct v3d_nir_lower_io_state *state)
220 {
221 const uint8_t NEW_PRIMITIVE_OFFSET = 0;
222 const uint8_t VERTEX_DATA_LENGTH_OFFSET = 8;
223
224 uint32_t vertex_data_size = state->gs.output_vertex_data_size;
225 assert((vertex_data_size & 0xffffff00) == 0);
226
227 uint32_t header;
228 header = 1 << NEW_PRIMITIVE_OFFSET;
229 header |= vertex_data_size << VERTEX_DATA_LENGTH_OFFSET;
230 nir_store_var(b, state->gs.header_var, nir_imm_int(b, header), 0x1);
231 }
232
233 static void
v3d_nir_lower_emit_vertex(struct v3d_compile * c,nir_builder * b,nir_intrinsic_instr * instr,struct v3d_nir_lower_io_state * state)234 v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b,
235 nir_intrinsic_instr *instr,
236 struct v3d_nir_lower_io_state *state)
237 {
238 b->cursor = nir_before_instr(&instr->instr);
239
240 nir_def *header = nir_load_var(b, state->gs.header_var);
241 nir_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
242 nir_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
243
244 /* Emit fixed function outputs */
245 v3d_nir_emit_ff_vpm_outputs(c, b, state);
246
247 /* Emit vertex header */
248 v3d_nir_store_output(b, 0, header_offset, header);
249
250 /* Update VPM offset for next vertex output data and header */
251 output_offset =
252 nir_iadd_imm(b, output_offset,
253 state->gs.output_vertex_data_size);
254
255 header_offset = nir_iadd_imm(b, header_offset, 1);
256
257 /* Reset the New Primitive bit */
258 header = nir_iand_imm(b, header, 0xfffffffe);
259
260 nir_store_var(b, state->gs.output_offset_var, output_offset, 0x1);
261 nir_store_var(b, state->gs.header_offset_var, header_offset, 0x1);
262 nir_store_var(b, state->gs.header_var, header, 0x1);
263
264 nir_instr_remove(&instr->instr);
265 }
266
267 static void
v3d_nir_lower_end_primitive(struct v3d_compile * c,nir_builder * b,nir_intrinsic_instr * instr,struct v3d_nir_lower_io_state * state)268 v3d_nir_lower_end_primitive(struct v3d_compile *c, nir_builder *b,
269 nir_intrinsic_instr *instr,
270 struct v3d_nir_lower_io_state *state)
271 {
272 assert(state->gs.header_var);
273 b->cursor = nir_before_instr(&instr->instr);
274 reset_gs_header(b, state);
275
276 nir_instr_remove(&instr->instr);
277 }
278
279 /* Some vertex attribute formats may require to apply a swizzle but the hardware
280 * doesn't provide means to do that, so we need to apply the swizzle in the
281 * vertex shader.
282 *
283 * This is required at least in Vulkan to support mandatory vertex attribute
284 * format VK_FORMAT_B8G8R8A8_UNORM.
285 */
286 static void
v3d_nir_lower_vertex_input(struct v3d_compile * c,nir_builder * b,nir_intrinsic_instr * instr)287 v3d_nir_lower_vertex_input(struct v3d_compile *c, nir_builder *b,
288 nir_intrinsic_instr *instr)
289 {
290 assert(c->s->info.stage == MESA_SHADER_VERTEX);
291
292 if (!c->vs_key->va_swap_rb_mask)
293 return;
294
295 const uint32_t location = nir_intrinsic_io_semantics(instr).location;
296
297 if (!(c->vs_key->va_swap_rb_mask & (1 << location)))
298 return;
299
300 assert(instr->num_components == 1);
301 const uint32_t comp = nir_intrinsic_component(instr);
302 if (comp == 0 || comp == 2)
303 nir_intrinsic_set_component(instr, (comp + 2) % 4);
304 }
305
306 static void
v3d_nir_lower_io_instr(struct v3d_compile * c,nir_builder * b,struct nir_instr * instr,struct v3d_nir_lower_io_state * state)307 v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
308 struct nir_instr *instr,
309 struct v3d_nir_lower_io_state *state)
310 {
311 if (instr->type != nir_instr_type_intrinsic)
312 return;
313 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
314
315 switch (intr->intrinsic) {
316 case nir_intrinsic_load_input:
317 if (c->s->info.stage == MESA_SHADER_VERTEX)
318 v3d_nir_lower_vertex_input(c, b, intr);
319 break;
320
321 case nir_intrinsic_store_output:
322 if (c->s->info.stage == MESA_SHADER_VERTEX ||
323 c->s->info.stage == MESA_SHADER_GEOMETRY) {
324 v3d_nir_lower_vpm_output(c, b, intr, state);
325 }
326 break;
327
328 case nir_intrinsic_emit_vertex:
329 v3d_nir_lower_emit_vertex(c, b, intr, state);
330 break;
331
332 case nir_intrinsic_end_primitive:
333 v3d_nir_lower_end_primitive(c, b, intr, state);
334 break;
335
336 default:
337 break;
338 }
339 }
340
341 /* Remap the output var's .driver_location. This is purely for
342 * nir_print_shader() so that store_output can map back to a variable name.
343 */
344 static void
v3d_nir_lower_io_update_output_var_base(struct v3d_compile * c,struct v3d_nir_lower_io_state * state)345 v3d_nir_lower_io_update_output_var_base(struct v3d_compile *c,
346 struct v3d_nir_lower_io_state *state)
347 {
348 nir_foreach_shader_out_variable_safe(var, c->s) {
349 if (var->data.location == VARYING_SLOT_POS &&
350 state->pos_vpm_offset != -1) {
351 var->data.driver_location = state->pos_vpm_offset;
352 continue;
353 }
354
355 if (var->data.location == VARYING_SLOT_PSIZ &&
356 state->psiz_vpm_offset != -1) {
357 var->data.driver_location = state->psiz_vpm_offset;
358 continue;
359 }
360
361 int vpm_offset =
362 v3d_varying_slot_vpm_offset(c,
363 var->data.location,
364 var->data.location_frac);
365 if (vpm_offset != -1) {
366 var->data.driver_location =
367 state->varyings_vpm_offset + vpm_offset;
368 } else {
369 /* If we couldn't find a mapping for the var, delete
370 * it so that its old .driver_location doesn't confuse
371 * nir_print_shader().
372 */
373 exec_node_remove(&var->node);
374 }
375 }
376 }
377
378 static void
v3d_nir_setup_vpm_layout_vs(struct v3d_compile * c,struct v3d_nir_lower_io_state * state)379 v3d_nir_setup_vpm_layout_vs(struct v3d_compile *c,
380 struct v3d_nir_lower_io_state *state)
381 {
382 uint32_t vpm_offset = 0;
383
384 state->pos_vpm_offset = -1;
385 state->vp_vpm_offset = -1;
386 state->zs_vpm_offset = -1;
387 state->rcp_wc_vpm_offset = -1;
388 state->psiz_vpm_offset = -1;
389
390 bool needs_ff_outputs = c->vs_key->base.is_last_geometry_stage;
391 if (needs_ff_outputs) {
392 if (c->vs_key->is_coord) {
393 state->pos_vpm_offset = vpm_offset;
394 vpm_offset += 4;
395 }
396
397 state->vp_vpm_offset = vpm_offset;
398 vpm_offset += 2;
399
400 if (!c->vs_key->is_coord) {
401 state->zs_vpm_offset = vpm_offset++;
402 state->rcp_wc_vpm_offset = vpm_offset++;
403 }
404
405 if (c->vs_key->per_vertex_point_size)
406 state->psiz_vpm_offset = vpm_offset++;
407 }
408
409 state->varyings_vpm_offset = vpm_offset;
410
411 c->vpm_output_size = MAX2(1, vpm_offset + c->vs_key->num_used_outputs);
412 }
413
414 static void
v3d_nir_setup_vpm_layout_gs(struct v3d_compile * c,struct v3d_nir_lower_io_state * state)415 v3d_nir_setup_vpm_layout_gs(struct v3d_compile *c,
416 struct v3d_nir_lower_io_state *state)
417 {
418 /* 1 header slot for number of output vertices */
419 uint32_t vpm_offset = 1;
420
421 /* 1 header slot per output vertex */
422 const uint32_t num_vertices = c->s->info.gs.vertices_out;
423 vpm_offset += num_vertices;
424
425 state->gs.output_header_size = vpm_offset;
426
427 /* Vertex data: here we only compute offsets into a generic vertex data
428 * elements. When it is time to actually write a particular vertex to
429 * the VPM, we will add the offset for that vertex into the VPM output
430 * to these offsets.
431 *
432 * If geometry shaders are present, they are always the last shader
433 * stage before rasterization, so we always emit fixed function outputs.
434 */
435 vpm_offset = 0;
436 if (c->gs_key->is_coord) {
437 state->pos_vpm_offset = vpm_offset;
438 vpm_offset += 4;
439 } else {
440 state->pos_vpm_offset = -1;
441 }
442
443 state->vp_vpm_offset = vpm_offset;
444 vpm_offset += 2;
445
446 if (!c->gs_key->is_coord) {
447 state->zs_vpm_offset = vpm_offset++;
448 state->rcp_wc_vpm_offset = vpm_offset++;
449 } else {
450 state->zs_vpm_offset = -1;
451 state->rcp_wc_vpm_offset = -1;
452 }
453
454 /* Mesa enables OES_geometry_shader_point_size automatically with
455 * OES_geometry_shader so we always need to handle point size
456 * writes if present.
457 */
458 if (c->gs_key->per_vertex_point_size)
459 state->psiz_vpm_offset = vpm_offset++;
460
461 state->varyings_vpm_offset = vpm_offset;
462
463 state->gs.output_vertex_data_size =
464 state->varyings_vpm_offset + c->gs_key->num_used_outputs;
465
466 c->vpm_output_size =
467 state->gs.output_header_size +
468 state->gs.output_vertex_data_size * num_vertices;
469 }
470
471 static void
v3d_nir_emit_ff_vpm_outputs(struct v3d_compile * c,nir_builder * b,struct v3d_nir_lower_io_state * state)472 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
473 struct v3d_nir_lower_io_state *state)
474 {
475 /* If this is a geometry shader we need to emit our fixed function
476 * outputs to the current vertex offset in the VPM.
477 */
478 nir_def *offset_reg =
479 c->s->info.stage == MESA_SHADER_GEOMETRY ?
480 nir_load_var(b, state->gs.output_offset_var) : NULL;
481
482 for (int i = 0; i < 4; i++) {
483 if (!state->pos[i])
484 state->pos[i] = nir_undef(b, 1, 32);
485 }
486
487 nir_def *rcp_wc = nir_frcp(b, state->pos[3]);
488
489 if (state->pos_vpm_offset != -1) {
490 for (int i = 0; i < 4; i++) {
491 v3d_nir_store_output(b, state->pos_vpm_offset + i,
492 offset_reg, state->pos[i]);
493 }
494 }
495
496 if (state->vp_vpm_offset != -1) {
497 for (int i = 0; i < 2; i++) {
498 nir_def *pos;
499 nir_def *scale;
500 pos = state->pos[i];
501 if (i == 0)
502 scale = nir_load_viewport_x_scale(b);
503 else
504 scale = nir_load_viewport_y_scale(b);
505 pos = nir_fmul(b, pos, scale);
506 pos = nir_fmul(b, pos, rcp_wc);
507 /* Pre-V3D 4.3 hardware has a quirk where it expects XY
508 * coordinates in .8 fixed-point format, but then it
509 * will internally round it to .6 fixed-point,
510 * introducing a double rounding. The double rounding
511 * can cause very slight differences in triangle
512 * raterization coverage that can actually be noticed by
513 * some CTS tests.
514 *
515 * The correct fix for this as recommended by Broadcom
516 * is to convert to .8 fixed-point with ffloor().
517 */
518 if (c->devinfo->ver == 42)
519 pos = nir_f2i32(b, nir_ffloor(b, pos));
520 else
521 pos = nir_f2i32(b, nir_fround_even(b, pos));
522
523 v3d_nir_store_output(b, state->vp_vpm_offset + i,
524 offset_reg, pos);
525 }
526 }
527
528 if (state->zs_vpm_offset != -1) {
529 nir_def *z = state->pos[2];
530 z = nir_fmul(b, z, nir_load_viewport_z_scale(b));
531 z = nir_fmul(b, z, rcp_wc);
532 z = nir_fadd(b, z, nir_load_viewport_z_offset(b));
533 v3d_nir_store_output(b, state->zs_vpm_offset, offset_reg, z);
534 }
535
536 if (state->rcp_wc_vpm_offset != -1) {
537 v3d_nir_store_output(b, state->rcp_wc_vpm_offset,
538 offset_reg, rcp_wc);
539 }
540
541 /* Store 0 to varyings requested by the FS but not stored by the
542 * previous stage. This should be undefined behavior, but
543 * glsl-routing seems to rely on it.
544 */
545 uint32_t num_used_outputs;
546 switch (c->s->info.stage) {
547 case MESA_SHADER_VERTEX:
548 num_used_outputs = c->vs_key->num_used_outputs;
549 break;
550 case MESA_SHADER_GEOMETRY:
551 num_used_outputs = c->gs_key->num_used_outputs;
552 break;
553 default:
554 unreachable("Unsupported shader stage");
555 }
556
557 for (int i = 0; i < num_used_outputs; i++) {
558 if (!BITSET_TEST(state->varyings_stored, i)) {
559 v3d_nir_store_output(b, state->varyings_vpm_offset + i,
560 offset_reg, nir_imm_int(b, 0));
561 }
562 }
563 }
564
565 static void
emit_gs_prolog(struct v3d_compile * c,nir_builder * b,nir_function_impl * impl,struct v3d_nir_lower_io_state * state)566 emit_gs_prolog(struct v3d_compile *c, nir_builder *b,
567 nir_function_impl *impl,
568 struct v3d_nir_lower_io_state *state)
569 {
570 nir_block *first = nir_start_block(impl);
571 b->cursor = nir_before_block(first);
572
573 const struct glsl_type *uint_type = glsl_uint_type();
574
575 assert(!state->gs.output_offset_var);
576 state->gs.output_offset_var =
577 nir_local_variable_create(impl, uint_type, "output_offset");
578 nir_store_var(b, state->gs.output_offset_var,
579 nir_imm_int(b, state->gs.output_header_size), 0x1);
580
581 assert(!state->gs.header_offset_var);
582 state->gs.header_offset_var =
583 nir_local_variable_create(impl, uint_type, "header_offset");
584 nir_store_var(b, state->gs.header_offset_var, nir_imm_int(b, 1), 0x1);
585
586 assert(!state->gs.header_var);
587 state->gs.header_var =
588 nir_local_variable_create(impl, uint_type, "header");
589 reset_gs_header(b, state);
590 }
591
592 static void
emit_gs_vpm_output_header_prolog(struct v3d_compile * c,nir_builder * b,struct v3d_nir_lower_io_state * state)593 emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b,
594 struct v3d_nir_lower_io_state *state)
595 {
596 const uint8_t VERTEX_COUNT_OFFSET = 16;
597
598 /* Our GS header has 1 generic header slot (at VPM offset 0) and then
599 * one slot per output vertex after it. This means we don't need to
600 * have a variable just to keep track of the number of vertices we
601 * emitted and instead we can just compute it here from the header
602 * offset variable by removing the one generic header slot that always
603 * goes at the beginning of out header.
604 */
605 nir_def *header_offset =
606 nir_load_var(b, state->gs.header_offset_var);
607 nir_def *vertex_count =
608 nir_iadd_imm(b, header_offset, -1);
609 nir_def *header =
610 nir_ior_imm(b,
611 nir_ishl_imm(b, vertex_count,
612 VERTEX_COUNT_OFFSET),
613 state->gs.output_header_size);
614
615 v3d_nir_store_output(b, 0, NULL, header);
616 }
617
618 bool
v3d_nir_lower_io(nir_shader * s,struct v3d_compile * c)619 v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
620 {
621 struct v3d_nir_lower_io_state state = { 0 };
622
623 /* Set up the layout of the VPM outputs. */
624 switch (s->info.stage) {
625 case MESA_SHADER_VERTEX:
626 v3d_nir_setup_vpm_layout_vs(c, &state);
627 break;
628 case MESA_SHADER_GEOMETRY:
629 v3d_nir_setup_vpm_layout_gs(c, &state);
630 break;
631 case MESA_SHADER_FRAGMENT:
632 case MESA_SHADER_COMPUTE:
633 break;
634 default:
635 unreachable("Unsupported shader stage");
636 }
637
638 nir_foreach_function_impl(impl, s) {
639 nir_builder b = nir_builder_create(impl);
640
641 if (c->s->info.stage == MESA_SHADER_GEOMETRY)
642 emit_gs_prolog(c, &b, impl, &state);
643
644 nir_foreach_block(block, impl) {
645 nir_foreach_instr_safe(instr, block)
646 v3d_nir_lower_io_instr(c, &b, instr,
647 &state);
648 }
649
650 nir_block *last = nir_impl_last_block(impl);
651 b.cursor = nir_after_block(last);
652 if (s->info.stage == MESA_SHADER_VERTEX) {
653 v3d_nir_emit_ff_vpm_outputs(c, &b, &state);
654 } else if (s->info.stage == MESA_SHADER_GEOMETRY) {
655 emit_gs_vpm_output_header_prolog(c, &b, &state);
656 }
657
658 nir_metadata_preserve(impl,
659 nir_metadata_block_index |
660 nir_metadata_dominance);
661 }
662
663 if (s->info.stage == MESA_SHADER_VERTEX ||
664 s->info.stage == MESA_SHADER_GEOMETRY) {
665 v3d_nir_lower_io_update_output_var_base(c, &state);
666 }
667
668 /* It is really unlikely that we don't get progress here, and fully
669 * filtering when not would make code more complex, but we are still
670 * interested on getting this lowering going through NIR_PASS
671 */
672 return true;
673 }
674