• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "compiler/v3d_compiler.h"
25 #include "compiler/nir/nir_builder.h"
26 
27 /**
28  * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
29  * intrinsics into something amenable to the V3D architecture.
30  *
31  * Most of the work is turning the VS's store_output intrinsics from working
32  * on a base representing the gallium-level vec4 driver_location to an offset
33  * within the VPM, and emitting the header that's read by the fixed function
34  * hardware between the VS and FS.
35  *
36  * We also adjust the offsets on uniform loads to be in bytes, since that's
37  * what we need for indirect addressing with general TMU access.
38  */
39 
40 struct v3d_nir_lower_io_state {
41         int pos_vpm_offset;
42         int vp_vpm_offset;
43         int zs_vpm_offset;
44         int rcp_wc_vpm_offset;
45         int psiz_vpm_offset;
46         int varyings_vpm_offset;
47 
48         /* Geometry shader state */
49         struct {
50                 /* VPM offset for the current vertex data output */
51                 nir_variable *output_offset_var;
52                 /* VPM offset for the current vertex header */
53                 nir_variable *header_offset_var;
54                 /* VPM header for the current vertex */
55                 nir_variable *header_var;
56 
57                 /* Size of the complete VPM output header */
58                 uint32_t output_header_size;
59                 /* Size of the output data for a single vertex */
60                 uint32_t output_vertex_data_size;
61         } gs;
62 
63         BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)];
64 
65         nir_def *pos[4];
66 };
67 
68 static void
69 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
70                             struct v3d_nir_lower_io_state *state);
71 
72 static void
v3d_nir_store_output(nir_builder * b,int base,nir_def * offset,nir_def * chan)73 v3d_nir_store_output(nir_builder *b, int base, nir_def *offset,
74                      nir_def *chan)
75 {
76         if (offset) {
77                 /* When generating the VIR instruction, the base and the offset
78                  * are just going to get added together with an ADD instruction
79                  * so we might as well do the add here at the NIR level instead
80                  * and let the constant folding do its magic.
81                  */
82                 offset = nir_iadd_imm(b, offset, base);
83                 base = 0;
84         } else {
85                 offset = nir_imm_int(b, 0);
86         }
87 
88         nir_store_output(b, chan, offset, .base = base, .write_mask = 0x1, .component = 0);
89 }
90 
91 static int
v3d_varying_slot_vpm_offset(struct v3d_compile * c,unsigned location,unsigned component)92 v3d_varying_slot_vpm_offset(struct v3d_compile *c, unsigned location, unsigned component)
93 {
94         uint32_t num_used_outputs = 0;
95         struct v3d_varying_slot *used_outputs = NULL;
96         switch (c->s->info.stage) {
97         case MESA_SHADER_VERTEX:
98                 num_used_outputs = c->vs_key->num_used_outputs;
99                 used_outputs = c->vs_key->used_outputs;
100                 break;
101         case MESA_SHADER_GEOMETRY:
102                 num_used_outputs = c->gs_key->num_used_outputs;
103                 used_outputs = c->gs_key->used_outputs;
104                 break;
105         default:
106                 unreachable("Unsupported shader stage");
107         }
108 
109         for (int i = 0; i < num_used_outputs; i++) {
110                 struct v3d_varying_slot slot = used_outputs[i];
111 
112                 if (v3d_slot_get_slot(slot) == location &&
113                     v3d_slot_get_component(slot) == component) {
114                         return i;
115                 }
116         }
117 
118         return -1;
119 }
120 
121 /* Lowers a store_output(gallium driver location) to a series of store_outputs
122  * with a driver_location equal to the offset in the VPM.
123  *
124  * For geometry shaders we need to emit multiple vertices so the VPM offsets
125  * need to be computed in the shader code based on the current vertex index.
126  */
127 static void
v3d_nir_lower_vpm_output(struct v3d_compile * c,nir_builder * b,nir_intrinsic_instr * intr,struct v3d_nir_lower_io_state * state)128 v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
129                          nir_intrinsic_instr *intr,
130                          struct v3d_nir_lower_io_state *state)
131 {
132         b->cursor = nir_before_instr(&intr->instr);
133 
134         /* If this is a geometry shader we need to emit our outputs
135          * to the current vertex offset in the VPM.
136          */
137         nir_def *offset_reg =
138                 c->s->info.stage == MESA_SHADER_GEOMETRY ?
139                         nir_load_var(b, state->gs.output_offset_var) : NULL;
140 
141         int start_comp = nir_intrinsic_component(intr);
142         unsigned location = nir_intrinsic_io_semantics(intr).location;
143         nir_def *src = intr->src[0].ssa;
144         /* Save off the components of the position for the setup of VPM inputs
145          * read by fixed function HW.
146          */
147         if (location == VARYING_SLOT_POS) {
148                 for (int i = 0; i < intr->num_components; i++) {
149                         state->pos[start_comp + i] = nir_channel(b, src, i);
150                 }
151         }
152 
153         /* Just psiz to the position in the FF header right now. */
154         if (location == VARYING_SLOT_PSIZ &&
155             state->psiz_vpm_offset != -1) {
156                 v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
157         }
158 
159         if (location == VARYING_SLOT_LAYER) {
160                 assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
161                 nir_def *header = nir_load_var(b, state->gs.header_var);
162                 header = nir_iand_imm(b, header, 0xff00ffff);
163 
164                 /* From the GLES 3.2 spec:
165                  *
166                  *    "When fragments are written to a layered framebuffer, the
167                  *     fragment’s layer number selects an image from the array
168                  *     of images at each attachment (...). If the fragment’s
169                  *     layer number is negative, or greater than or equal to
170                  *     the minimum number of layers of any attachment, the
171                  *     effects of the fragment on the framebuffer contents are
172                  *     undefined."
173                  *
174                  * This suggests we can just ignore that situation, however,
175                  * for V3D an out-of-bounds layer index means that the binner
176                  * might do out-of-bounds writes access to the tile state. The
177                  * simulator has an assert to catch this, so we play safe here
178                  * and we make sure that doesn't happen by setting gl_Layer
179                  * to 0 in that case (we always allocate tile state for at
180                  * least one layer).
181                  */
182                 nir_def *fb_layers = nir_load_fb_layers_v3d(b, 32);
183                 nir_def *cond = nir_ige(b, src, fb_layers);
184                 nir_def *layer_id =
185                         nir_bcsel(b, cond,
186                                   nir_imm_int(b, 0),
187                                   nir_ishl_imm(b, src, 16));
188                 header = nir_ior(b, header, layer_id);
189                 nir_store_var(b, state->gs.header_var, header, 0x1);
190         }
191 
192         /* Scalarize outputs if it hasn't happened already, since we want to
193          * schedule each VPM write individually.  We can skip any output
194          * components not read by the FS.
195          */
196         for (int i = 0; i < intr->num_components; i++) {
197                 int vpm_offset =
198                         v3d_varying_slot_vpm_offset(c, location, start_comp + i);
199 
200                 if (!(nir_intrinsic_write_mask(intr) & (1 << i)))
201                         continue;
202 
203                 if (vpm_offset == -1)
204                         continue;
205 
206                 if (nir_src_is_const(intr->src[1]))
207                     vpm_offset += nir_src_as_uint(intr->src[1]) * 4;
208 
209                 BITSET_SET(state->varyings_stored, vpm_offset);
210 
211                 v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset,
212                                      offset_reg, nir_channel(b, src, i));
213         }
214 
215         nir_instr_remove(&intr->instr);
216 }
217 
218 static inline void
reset_gs_header(nir_builder * b,struct v3d_nir_lower_io_state * state)219 reset_gs_header(nir_builder *b, struct v3d_nir_lower_io_state *state)
220 {
221         const uint8_t NEW_PRIMITIVE_OFFSET = 0;
222         const uint8_t VERTEX_DATA_LENGTH_OFFSET = 8;
223 
224         uint32_t vertex_data_size = state->gs.output_vertex_data_size;
225         assert((vertex_data_size & 0xffffff00) == 0);
226 
227         uint32_t header;
228         header  = 1 << NEW_PRIMITIVE_OFFSET;
229         header |= vertex_data_size << VERTEX_DATA_LENGTH_OFFSET;
230         nir_store_var(b, state->gs.header_var, nir_imm_int(b, header), 0x1);
231 }
232 
233 static void
v3d_nir_lower_emit_vertex(struct v3d_compile * c,nir_builder * b,nir_intrinsic_instr * instr,struct v3d_nir_lower_io_state * state)234 v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b,
235                           nir_intrinsic_instr *instr,
236                           struct v3d_nir_lower_io_state *state)
237 {
238         b->cursor = nir_before_instr(&instr->instr);
239 
240         nir_def *header = nir_load_var(b, state->gs.header_var);
241         nir_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
242         nir_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
243 
244         /* Emit fixed function outputs */
245         v3d_nir_emit_ff_vpm_outputs(c, b, state);
246 
247         /* Emit vertex header */
248         v3d_nir_store_output(b, 0, header_offset, header);
249 
250         /* Update VPM offset for next vertex output data and header */
251         output_offset =
252                 nir_iadd_imm(b, output_offset,
253                              state->gs.output_vertex_data_size);
254 
255         header_offset = nir_iadd_imm(b, header_offset, 1);
256 
257         /* Reset the New Primitive bit */
258         header = nir_iand_imm(b, header, 0xfffffffe);
259 
260         nir_store_var(b, state->gs.output_offset_var, output_offset, 0x1);
261         nir_store_var(b, state->gs.header_offset_var, header_offset, 0x1);
262         nir_store_var(b, state->gs.header_var, header, 0x1);
263 
264         nir_instr_remove(&instr->instr);
265 }
266 
267 static void
v3d_nir_lower_end_primitive(struct v3d_compile * c,nir_builder * b,nir_intrinsic_instr * instr,struct v3d_nir_lower_io_state * state)268 v3d_nir_lower_end_primitive(struct v3d_compile *c, nir_builder *b,
269                             nir_intrinsic_instr *instr,
270                             struct v3d_nir_lower_io_state *state)
271 {
272         assert(state->gs.header_var);
273         b->cursor = nir_before_instr(&instr->instr);
274         reset_gs_header(b, state);
275 
276         nir_instr_remove(&instr->instr);
277 }
278 
279 /* Some vertex attribute formats may require to apply a swizzle but the hardware
280  * doesn't provide means to do that, so we need to apply the swizzle in the
281  * vertex shader.
282  *
283  * This is required at least in Vulkan to support mandatory vertex attribute
284  * format VK_FORMAT_B8G8R8A8_UNORM.
285  */
286 static void
v3d_nir_lower_vertex_input(struct v3d_compile * c,nir_builder * b,nir_intrinsic_instr * instr)287 v3d_nir_lower_vertex_input(struct v3d_compile *c, nir_builder *b,
288                            nir_intrinsic_instr *instr)
289 {
290         assert(c->s->info.stage == MESA_SHADER_VERTEX);
291 
292         if (!c->vs_key->va_swap_rb_mask)
293                 return;
294 
295         const uint32_t location = nir_intrinsic_io_semantics(instr).location;
296 
297         if (!(c->vs_key->va_swap_rb_mask & (1 << location)))
298                 return;
299 
300         assert(instr->num_components == 1);
301         const uint32_t comp = nir_intrinsic_component(instr);
302         if (comp == 0 || comp == 2)
303                 nir_intrinsic_set_component(instr, (comp + 2) % 4);
304 }
305 
306 static void
v3d_nir_lower_io_instr(struct v3d_compile * c,nir_builder * b,struct nir_instr * instr,struct v3d_nir_lower_io_state * state)307 v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
308                        struct nir_instr *instr,
309                        struct v3d_nir_lower_io_state *state)
310 {
311         if (instr->type != nir_instr_type_intrinsic)
312                 return;
313         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
314 
315         switch (intr->intrinsic) {
316         case nir_intrinsic_load_input:
317                 if (c->s->info.stage == MESA_SHADER_VERTEX)
318                         v3d_nir_lower_vertex_input(c, b, intr);
319                 break;
320 
321         case nir_intrinsic_store_output:
322                 if (c->s->info.stage == MESA_SHADER_VERTEX ||
323                     c->s->info.stage == MESA_SHADER_GEOMETRY) {
324                         v3d_nir_lower_vpm_output(c, b, intr, state);
325                 }
326                 break;
327 
328         case nir_intrinsic_emit_vertex:
329                 v3d_nir_lower_emit_vertex(c, b, intr, state);
330                 break;
331 
332         case nir_intrinsic_end_primitive:
333                 v3d_nir_lower_end_primitive(c, b, intr, state);
334                 break;
335 
336         default:
337                 break;
338         }
339 }
340 
341 /* Remap the output var's .driver_location.  This is purely for
342  * nir_print_shader() so that store_output can map back to a variable name.
343  */
344 static void
v3d_nir_lower_io_update_output_var_base(struct v3d_compile * c,struct v3d_nir_lower_io_state * state)345 v3d_nir_lower_io_update_output_var_base(struct v3d_compile *c,
346                                         struct v3d_nir_lower_io_state *state)
347 {
348         nir_foreach_shader_out_variable_safe(var, c->s) {
349                 if (var->data.location == VARYING_SLOT_POS &&
350                     state->pos_vpm_offset != -1) {
351                         var->data.driver_location = state->pos_vpm_offset;
352                         continue;
353                 }
354 
355                 if (var->data.location == VARYING_SLOT_PSIZ &&
356                     state->psiz_vpm_offset != -1) {
357                         var->data.driver_location = state->psiz_vpm_offset;
358                         continue;
359                 }
360 
361                 int vpm_offset =
362                         v3d_varying_slot_vpm_offset(c,
363                                                     var->data.location,
364                                                     var->data.location_frac);
365                 if (vpm_offset != -1) {
366                         var->data.driver_location =
367                                 state->varyings_vpm_offset + vpm_offset;
368                 } else {
369                         /* If we couldn't find a mapping for the var, delete
370                          * it so that its old .driver_location doesn't confuse
371                          * nir_print_shader().
372                          */
373                         exec_node_remove(&var->node);
374                 }
375         }
376 }
377 
378 static void
v3d_nir_setup_vpm_layout_vs(struct v3d_compile * c,struct v3d_nir_lower_io_state * state)379 v3d_nir_setup_vpm_layout_vs(struct v3d_compile *c,
380                             struct v3d_nir_lower_io_state *state)
381 {
382         uint32_t vpm_offset = 0;
383 
384         state->pos_vpm_offset = -1;
385         state->vp_vpm_offset = -1;
386         state->zs_vpm_offset = -1;
387         state->rcp_wc_vpm_offset = -1;
388         state->psiz_vpm_offset = -1;
389 
390         bool needs_ff_outputs = c->vs_key->base.is_last_geometry_stage;
391         if (needs_ff_outputs) {
392                 if (c->vs_key->is_coord) {
393                         state->pos_vpm_offset = vpm_offset;
394                         vpm_offset += 4;
395                 }
396 
397                 state->vp_vpm_offset = vpm_offset;
398                 vpm_offset += 2;
399 
400                 if (!c->vs_key->is_coord) {
401                         state->zs_vpm_offset = vpm_offset++;
402                         state->rcp_wc_vpm_offset = vpm_offset++;
403                 }
404 
405                 if (c->vs_key->per_vertex_point_size)
406                         state->psiz_vpm_offset = vpm_offset++;
407         }
408 
409         state->varyings_vpm_offset = vpm_offset;
410 
411         c->vpm_output_size = MAX2(1, vpm_offset + c->vs_key->num_used_outputs);
412 }
413 
414 static void
v3d_nir_setup_vpm_layout_gs(struct v3d_compile * c,struct v3d_nir_lower_io_state * state)415 v3d_nir_setup_vpm_layout_gs(struct v3d_compile *c,
416                             struct v3d_nir_lower_io_state *state)
417 {
418         /* 1 header slot for number of output vertices */
419         uint32_t vpm_offset = 1;
420 
421         /* 1 header slot per output vertex */
422         const uint32_t num_vertices = c->s->info.gs.vertices_out;
423         vpm_offset += num_vertices;
424 
425         state->gs.output_header_size = vpm_offset;
426 
427         /* Vertex data: here we only compute offsets into a generic vertex data
428          * elements. When it is time to actually write a particular vertex to
429          * the VPM, we will add the offset for that vertex into the VPM output
430          * to these offsets.
431          *
432          * If geometry shaders are present, they are always the last shader
433          * stage before rasterization, so we always emit fixed function outputs.
434          */
435         vpm_offset = 0;
436         if (c->gs_key->is_coord) {
437                 state->pos_vpm_offset = vpm_offset;
438                 vpm_offset += 4;
439         } else {
440                 state->pos_vpm_offset = -1;
441         }
442 
443         state->vp_vpm_offset = vpm_offset;
444         vpm_offset += 2;
445 
446         if (!c->gs_key->is_coord) {
447                 state->zs_vpm_offset = vpm_offset++;
448                 state->rcp_wc_vpm_offset = vpm_offset++;
449         } else {
450                 state->zs_vpm_offset = -1;
451                 state->rcp_wc_vpm_offset = -1;
452         }
453 
454         /* Mesa enables OES_geometry_shader_point_size automatically with
455          * OES_geometry_shader so we always need to handle point size
456          * writes if present.
457          */
458         if (c->gs_key->per_vertex_point_size)
459                 state->psiz_vpm_offset = vpm_offset++;
460 
461         state->varyings_vpm_offset = vpm_offset;
462 
463         state->gs.output_vertex_data_size =
464                 state->varyings_vpm_offset + c->gs_key->num_used_outputs;
465 
466         c->vpm_output_size =
467                 state->gs.output_header_size +
468                 state->gs.output_vertex_data_size * num_vertices;
469 }
470 
471 static void
v3d_nir_emit_ff_vpm_outputs(struct v3d_compile * c,nir_builder * b,struct v3d_nir_lower_io_state * state)472 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
473                             struct v3d_nir_lower_io_state *state)
474 {
475         /* If this is a geometry shader we need to emit our fixed function
476          * outputs to the current vertex offset in the VPM.
477          */
478         nir_def *offset_reg =
479                 c->s->info.stage == MESA_SHADER_GEOMETRY ?
480                         nir_load_var(b, state->gs.output_offset_var) : NULL;
481 
482         for (int i = 0; i < 4; i++) {
483                 if (!state->pos[i])
484                         state->pos[i] = nir_undef(b, 1, 32);
485         }
486 
487         nir_def *rcp_wc = nir_frcp(b, state->pos[3]);
488 
489         if (state->pos_vpm_offset != -1) {
490                 for (int i = 0; i < 4; i++) {
491                         v3d_nir_store_output(b, state->pos_vpm_offset + i,
492                                              offset_reg, state->pos[i]);
493                 }
494         }
495 
496         if (state->vp_vpm_offset != -1) {
497                 for (int i = 0; i < 2; i++) {
498                         nir_def *pos;
499                         nir_def *scale;
500                         pos = state->pos[i];
501                         if (i == 0)
502                                 scale = nir_load_viewport_x_scale(b);
503                         else
504                                 scale = nir_load_viewport_y_scale(b);
505                         pos = nir_fmul(b, pos, scale);
506                         pos = nir_fmul(b, pos, rcp_wc);
507                         /* Pre-V3D 4.3 hardware has a quirk where it expects XY
508                          * coordinates in .8 fixed-point format, but then it
509                          * will internally round it to .6 fixed-point,
510                          * introducing a double rounding. The double rounding
511                          * can cause very slight differences in triangle
512                          * raterization coverage that can actually be noticed by
513                          * some CTS tests.
514                          *
515                          * The correct fix for this as recommended by Broadcom
516                          * is to convert to .8 fixed-point with ffloor().
517                          */
518                         if (c->devinfo->ver == 42)
519                                  pos = nir_f2i32(b, nir_ffloor(b, pos));
520                         else
521                                  pos = nir_f2i32(b, nir_fround_even(b, pos));
522 
523                        v3d_nir_store_output(b, state->vp_vpm_offset + i,
524                                             offset_reg, pos);
525                 }
526         }
527 
528         if (state->zs_vpm_offset != -1) {
529                 nir_def *z = state->pos[2];
530                 z = nir_fmul(b, z, nir_load_viewport_z_scale(b));
531                 z = nir_fmul(b, z, rcp_wc);
532                 z = nir_fadd(b, z, nir_load_viewport_z_offset(b));
533                 v3d_nir_store_output(b, state->zs_vpm_offset, offset_reg, z);
534         }
535 
536         if (state->rcp_wc_vpm_offset != -1) {
537                 v3d_nir_store_output(b, state->rcp_wc_vpm_offset,
538                                      offset_reg, rcp_wc);
539         }
540 
541         /* Store 0 to varyings requested by the FS but not stored by the
542          * previous stage. This should be undefined behavior, but
543          * glsl-routing seems to rely on it.
544          */
545         uint32_t num_used_outputs;
546         switch (c->s->info.stage) {
547         case MESA_SHADER_VERTEX:
548                 num_used_outputs = c->vs_key->num_used_outputs;
549                 break;
550         case MESA_SHADER_GEOMETRY:
551                 num_used_outputs = c->gs_key->num_used_outputs;
552                 break;
553         default:
554                 unreachable("Unsupported shader stage");
555         }
556 
557         for (int i = 0; i < num_used_outputs; i++) {
558                 if (!BITSET_TEST(state->varyings_stored, i)) {
559                         v3d_nir_store_output(b, state->varyings_vpm_offset + i,
560                                              offset_reg, nir_imm_int(b, 0));
561                 }
562         }
563 }
564 
565 static void
emit_gs_prolog(struct v3d_compile * c,nir_builder * b,nir_function_impl * impl,struct v3d_nir_lower_io_state * state)566 emit_gs_prolog(struct v3d_compile *c, nir_builder *b,
567                nir_function_impl *impl,
568                struct v3d_nir_lower_io_state *state)
569 {
570         nir_block *first = nir_start_block(impl);
571         b->cursor = nir_before_block(first);
572 
573         const struct glsl_type *uint_type = glsl_uint_type();
574 
575         assert(!state->gs.output_offset_var);
576         state->gs.output_offset_var =
577                 nir_local_variable_create(impl, uint_type, "output_offset");
578         nir_store_var(b, state->gs.output_offset_var,
579                       nir_imm_int(b, state->gs.output_header_size), 0x1);
580 
581         assert(!state->gs.header_offset_var);
582         state->gs.header_offset_var =
583                 nir_local_variable_create(impl, uint_type, "header_offset");
584         nir_store_var(b, state->gs.header_offset_var, nir_imm_int(b, 1), 0x1);
585 
586         assert(!state->gs.header_var);
587         state->gs.header_var =
588                 nir_local_variable_create(impl, uint_type, "header");
589         reset_gs_header(b, state);
590 }
591 
592 static void
emit_gs_vpm_output_header_prolog(struct v3d_compile * c,nir_builder * b,struct v3d_nir_lower_io_state * state)593 emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b,
594                                  struct v3d_nir_lower_io_state *state)
595 {
596         const uint8_t VERTEX_COUNT_OFFSET = 16;
597 
598         /* Our GS header has 1 generic header slot (at VPM offset 0) and then
599          * one slot per output vertex after it. This means we don't need to
600          * have a variable just to keep track of the number of vertices we
601          * emitted and instead we can just compute it here from the header
602          * offset variable by removing the one generic header slot that always
603          * goes at the beginning of out header.
604          */
605         nir_def *header_offset =
606                 nir_load_var(b, state->gs.header_offset_var);
607         nir_def *vertex_count =
608                 nir_iadd_imm(b, header_offset, -1);
609         nir_def *header =
610                 nir_ior_imm(b,
611                             nir_ishl_imm(b, vertex_count,
612                                          VERTEX_COUNT_OFFSET),
613                             state->gs.output_header_size);
614 
615         v3d_nir_store_output(b, 0, NULL, header);
616 }
617 
618 bool
v3d_nir_lower_io(nir_shader * s,struct v3d_compile * c)619 v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
620 {
621         struct v3d_nir_lower_io_state state = { 0 };
622 
623         /* Set up the layout of the VPM outputs. */
624         switch (s->info.stage) {
625         case MESA_SHADER_VERTEX:
626                 v3d_nir_setup_vpm_layout_vs(c, &state);
627                 break;
628         case MESA_SHADER_GEOMETRY:
629                 v3d_nir_setup_vpm_layout_gs(c, &state);
630                 break;
631         case MESA_SHADER_FRAGMENT:
632         case MESA_SHADER_COMPUTE:
633                 break;
634         default:
635                 unreachable("Unsupported shader stage");
636         }
637 
638         nir_foreach_function_impl(impl, s) {
639                 nir_builder b = nir_builder_create(impl);
640 
641                 if (c->s->info.stage == MESA_SHADER_GEOMETRY)
642                         emit_gs_prolog(c, &b, impl, &state);
643 
644                 nir_foreach_block(block, impl) {
645                         nir_foreach_instr_safe(instr, block)
646                                 v3d_nir_lower_io_instr(c, &b, instr,
647                                                        &state);
648                 }
649 
650                 nir_block *last = nir_impl_last_block(impl);
651                 b.cursor = nir_after_block(last);
652                 if (s->info.stage == MESA_SHADER_VERTEX) {
653                         v3d_nir_emit_ff_vpm_outputs(c, &b, &state);
654                 } else if (s->info.stage == MESA_SHADER_GEOMETRY) {
655                         emit_gs_vpm_output_header_prolog(c, &b, &state);
656                 }
657 
658                 nir_metadata_preserve(impl,
659                                       nir_metadata_block_index |
660                                       nir_metadata_dominance);
661         }
662 
663         if (s->info.stage == MESA_SHADER_VERTEX ||
664             s->info.stage == MESA_SHADER_GEOMETRY) {
665                 v3d_nir_lower_io_update_output_var_base(c, &state);
666         }
667 
668         /* It is really unlikely that we don't get progress here, and fully
669          * filtering when not would make code more complex, but we are still
670          * interested on getting this lowering going through NIR_PASS
671          */
672         return true;
673 }
674