• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2011 Intel Corporation
3  * Copyright © 2022 Valve Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  */
24 
25 /**
26  * This lowering pass generates GLSL code that manually packs varyings into
27  * vec4 slots, for the benefit of back-ends that don't support packed varyings
28  * natively.
29  *
30  * For example, the following shader:
31  *
32  *   out mat3x2 foo;  // location=4, location_frac=0
33  *   out vec3 bar[2]; // location=5, location_frac=2
34  *
35  *   main()
36  *   {
37  *     ...
38  *   }
39  *
40  * Is rewritten to:
41  *
42  *   mat3x2 foo;
43  *   vec3 bar[2];
44  *   out vec4 packed4; // location=4, location_frac=0
45  *   out vec4 packed5; // location=5, location_frac=0
46  *   out vec4 packed6; // location=6, location_frac=0
47  *
48  *   main()
49  *   {
50  *     ...
51  *     packed4.xy = foo[0];
52  *     packed4.zw = foo[1];
53  *     packed5.xy = foo[2];
54  *     packed5.zw = bar[0].xy;
55  *     packed6.x = bar[0].z;
56  *     packed6.yzw = bar[1];
57  *   }
58  *
59  * This lowering pass properly handles "double parking" of a varying vector
60  * across two varying slots.  For example, in the code above, two of the
61  * components of bar[0] are stored in packed5, and the remaining component is
62  * stored in packed6.
63  *
64  * Note that in theory, the extra instructions may cause some loss of
65  * performance.  However, hopefully in most cases the performance loss will
66  * either be absorbed by a later optimization pass, or it will be offset by
67  * memory bandwidth savings (because fewer varyings are used).
68  *
69  * This lowering pass also packs flat floats, ints, and uints together, by
70  * using ivec4 as the base type of flat "varyings", and using appropriate
71  * casts to convert floats and uints into ints.
72  *
73  * This lowering pass also handles varyings whose type is a struct or an array
74  * of struct.  Structs are packed in order and with no gaps, so there may be a
75  * performance penalty due to structure elements being double-parked.
76  *
77  * Lowering of geometry shader inputs is slightly more complex, since geometry
78  * inputs are always arrays, so we need to lower arrays to arrays.  For
79  * example, the following input:
80  *
81  *   in struct Foo {
82  *     float f;
83  *     vec3 v;
84  *     vec2 a[2];
85  *   } arr[3];         // location=4, location_frac=0
86  *
87  * Would get lowered like this if it occurred in a fragment shader:
88  *
89  *   struct Foo {
90  *     float f;
91  *     vec3 v;
92  *     vec2 a[2];
93  *   } arr[3];
94  *   in vec4 packed4;  // location=4, location_frac=0
95  *   in vec4 packed5;  // location=5, location_frac=0
96  *   in vec4 packed6;  // location=6, location_frac=0
97  *   in vec4 packed7;  // location=7, location_frac=0
98  *   in vec4 packed8;  // location=8, location_frac=0
99  *   in vec4 packed9;  // location=9, location_frac=0
100  *
101  *   main()
102  *   {
103  *     arr[0].f = packed4.x;
104  *     arr[0].v = packed4.yzw;
105  *     arr[0].a[0] = packed5.xy;
106  *     arr[0].a[1] = packed5.zw;
107  *     arr[1].f = packed6.x;
108  *     arr[1].v = packed6.yzw;
109  *     arr[1].a[0] = packed7.xy;
110  *     arr[1].a[1] = packed7.zw;
111  *     arr[2].f = packed8.x;
112  *     arr[2].v = packed8.yzw;
113  *     arr[2].a[0] = packed9.xy;
114  *     arr[2].a[1] = packed9.zw;
115  *     ...
116  *   }
117  *
118  * But it would get lowered like this if it occurred in a geometry shader:
119  *
120  *   struct Foo {
121  *     float f;
122  *     vec3 v;
123  *     vec2 a[2];
124  *   } arr[3];
125  *   in vec4 packed4[3];  // location=4, location_frac=0
126  *   in vec4 packed5[3];  // location=5, location_frac=0
127  *
128  *   main()
129  *   {
130  *     arr[0].f = packed4[0].x;
131  *     arr[0].v = packed4[0].yzw;
132  *     arr[0].a[0] = packed5[0].xy;
133  *     arr[0].a[1] = packed5[0].zw;
134  *     arr[1].f = packed4[1].x;
135  *     arr[1].v = packed4[1].yzw;
136  *     arr[1].a[0] = packed5[1].xy;
137  *     arr[1].a[1] = packed5[1].zw;
138  *     arr[2].f = packed4[2].x;
139  *     arr[2].v = packed4[2].yzw;
140  *     arr[2].a[0] = packed5[2].xy;
141  *     arr[2].a[1] = packed5[2].zw;
142  *     ...
143  *   }
144  */
145 
146 #include "nir.h"
147 #include "nir_builder.h"
148 #include "gl_nir.h"
149 #include "gl_nir_linker.h"
150 #include "program/prog_instruction.h"
151 #include "main/mtypes.h"
152 
153 /**
154  * Visitor that performs varying packing.  For each varying declared in the
155  * shader, this visitor determines whether it needs to be packed.  If so, it
156  * demotes it to an ordinary global, creates new packed varyings, and
157  * generates assignments to convert between the original varying and the
158  * packed varying.
159  */
160 struct lower_packed_varyings_state
161 {
162    const struct gl_constants *consts;
163 
164    struct gl_shader_program *prog;
165 
166    /**
167     * Memory context used to allocate new instructions for the shader.
168     */
169    void *mem_ctx;
170 
171    /**
172     * Number of generic varying slots which are used by this shader.  This is
173     * used to allocate temporary intermediate data structures.  If any varying
174     * used by this shader has a location greater than or equal to
175     * VARYING_SLOT_VAR0 + locations_used, an assertion will fire.
176     */
177    unsigned locations_used;
178 
179    const uint8_t* components;
180 
181    /**
182     * Array of pointers to the packed varyings that have been created for each
183     * generic varying slot.  NULL entries in this array indicate varying slots
184     * for which a packed varying has not been created yet.
185     */
186    nir_variable **packed_varyings;
187 
188    nir_shader *shader;
189 
190    nir_function_impl *impl;
191 
192    nir_builder b;
193 
194    /**
195     * Type of varying which is being lowered in this pass (either
196     * nir_var_shader_in or ir_var_shader_out).
197     */
198    nir_variable_mode mode;
199 
200    /**
201     * If we are currently lowering geometry shader inputs, the number of input
202     * vertices the geometry shader accepts.  Otherwise zero.
203     */
204    unsigned gs_input_vertices;
205 
206    bool disable_varying_packing;
207    bool disable_xfb_packing;
208    bool xfb_enabled;
209    bool ifc_exposed_to_query_api;
210 };
211 
212 static bool
needs_lowering(struct lower_packed_varyings_state * state,nir_variable * var)213 needs_lowering(struct lower_packed_varyings_state *state, nir_variable *var)
214 {
215    /* Things composed of vec4's, varyings with explicitly assigned
216     * locations or varyings marked as must_be_shader_input (which might be used
217     * by interpolateAt* functions) shouldn't be lowered. Everything else can be.
218     */
219    if (var->data.explicit_location || var->data.must_be_shader_input)
220       return false;
221 
222    const struct glsl_type *type = var->type;
223    if (nir_is_arrayed_io(var, state->shader->info.stage) || var->data.per_view) {
224       assert(glsl_type_is_array(type));
225       type = glsl_get_array_element(type);
226    }
227 
228    /* Some drivers (e.g. panfrost) don't support packing of transform
229     * feedback varyings.
230     */
231    if (state->disable_xfb_packing && var->data.is_xfb &&
232        !(glsl_type_is_array(type) || glsl_type_is_struct(type) || glsl_type_is_matrix(type)) &&
233        state->xfb_enabled)
234       return false;
235 
236    /* Override disable_varying_packing if the var is only used by transform
237     * feedback. Also override it if transform feedback is enabled and the
238     * variable is an array, struct or matrix as the elements of these types
239     * will always have the same interpolation and therefore are safe to pack.
240     */
241    if (state->disable_varying_packing && !var->data.is_xfb_only &&
242        !((glsl_type_is_array(type) || glsl_type_is_struct(type) || glsl_type_is_matrix(type)) &&
243          state->xfb_enabled))
244       return false;
245 
246    type = glsl_without_array(type);
247    if (glsl_get_vector_elements(type) == 4 && !glsl_type_is_64bit(type))
248       return false;
249    return true;
250 }
251 
252 /**
253  * If no packed varying has been created for the given varying location yet,
254  * create it and add it to the shader.
255  *
256  * The newly created varying inherits its interpolation parameters from \c
257  * unpacked_var.  Its base type is ivec4 if we are lowering a flat varying,
258  * vec4 otherwise.
259  */
260 static void
create_or_update_packed_varying(struct lower_packed_varyings_state * state,nir_variable * unpacked_var,const char * name,unsigned location,unsigned slot,unsigned vertex_index)261 create_or_update_packed_varying(struct lower_packed_varyings_state *state,
262                                 nir_variable *unpacked_var,
263                                 const char *name, unsigned location,
264                                 unsigned slot, unsigned vertex_index)
265 {
266    assert(slot < state->locations_used);
267    if (state->packed_varyings[slot] == NULL) {
268       assert(state->components[slot] != 0);
269       assert(name);
270 
271       nir_variable *packed_var = rzalloc(state->shader, nir_variable);
272       packed_var->name = ralloc_asprintf(packed_var, "packed:%s", name);
273       packed_var->data.mode = state->mode;
274 
275       bool is_interpolation_flat =
276          unpacked_var->data.interpolation == INTERP_MODE_FLAT ||
277          glsl_contains_integer(unpacked_var->type) ||
278          glsl_contains_double(unpacked_var->type);
279 
280       const struct glsl_type *packed_type;
281       if (is_interpolation_flat)
282          packed_type = glsl_vector_type(GLSL_TYPE_INT, state->components[slot]);
283       else
284          packed_type = glsl_vector_type(GLSL_TYPE_FLOAT, state->components[slot]);
285 
286       if (state->gs_input_vertices != 0) {
287          packed_type =
288             glsl_array_type(packed_type, state->gs_input_vertices, 0);
289       }
290 
291       packed_var->type = packed_type;
292       packed_var->data.centroid = unpacked_var->data.centroid;
293       packed_var->data.sample = unpacked_var->data.sample;
294       packed_var->data.patch = unpacked_var->data.patch;
295       packed_var->data.interpolation = is_interpolation_flat ?
296          (unsigned) INTERP_MODE_FLAT : unpacked_var->data.interpolation;
297       packed_var->data.location = location;
298       packed_var->data.precision = unpacked_var->data.precision;
299       packed_var->data.always_active_io = unpacked_var->data.always_active_io;
300       packed_var->data.stream = NIR_STREAM_PACKED;
301 
302       nir_shader_add_variable(state->shader, packed_var);
303       state->packed_varyings[slot] = packed_var;
304    } else {
305       nir_variable *var = state->packed_varyings[slot];
306 
307       /* The slot needs to be marked as always active if any variable that got
308        * packed there was.
309        */
310       var->data.always_active_io |= unpacked_var->data.always_active_io;
311 
312       /* For geometry shader inputs, only update the packed variable name the
313        * first time we visit each component.
314        */
315       if (state->gs_input_vertices == 0 || vertex_index == 0) {
316          assert(name);
317          ralloc_asprintf_append((char **) &var->name, ",%s", name);
318       }
319    }
320 }
321 
322 /**
323  * Retrieve the packed varying corresponding to the given varying location.
324  *
325  * \param vertex_index: if we are lowering geometry shader inputs, then this
326  * indicates which vertex we are currently lowering.  Otherwise it is ignored.
327  */
328 static nir_deref_instr *
get_packed_varying_deref(struct lower_packed_varyings_state * state,unsigned location,nir_variable * unpacked_var,const char * name,unsigned vertex_index)329 get_packed_varying_deref(struct lower_packed_varyings_state *state,
330                          unsigned location, nir_variable *unpacked_var,
331                          const char *name, unsigned vertex_index)
332 {
333    unsigned slot = location - VARYING_SLOT_VAR0;
334    assert(slot < state->locations_used);
335 
336    create_or_update_packed_varying(state, unpacked_var, name, location, slot,
337                                    vertex_index);
338 
339    nir_deref_instr *deref =
340       nir_build_deref_var(&state->b, state->packed_varyings[slot]);
341 
342    if (state->gs_input_vertices != 0) {
343       /* When lowering GS inputs, the packed variable is an array, so we need
344        * to dereference it using vertex_index.
345        */
346       nir_load_const_instr *c_idx =
347          nir_load_const_instr_create(state->b.shader, 1, 32);
348       c_idx->value[0].u32 = vertex_index;
349       nir_builder_instr_insert(&state->b, &c_idx->instr);
350 
351       deref = nir_build_deref_array(&state->b, deref, &c_idx->def);
352    }
353 
354    return deref;
355 }
356 
357 static nir_ssa_def *
i2u(struct lower_packed_varyings_state * state,nir_ssa_def * value)358 i2u(struct lower_packed_varyings_state *state, nir_ssa_def *value)
359 {
360    value =
361       nir_build_alu(&state->b,
362                     nir_type_conversion_op(nir_type_int, nir_type_uint,
363                                            nir_rounding_mode_undef),
364                     value, NULL, NULL, NULL);
365    return value;
366 }
367 
368 static nir_ssa_def *
u2i(struct lower_packed_varyings_state * state,nir_ssa_def * value)369 u2i(struct lower_packed_varyings_state *state, nir_ssa_def *value)
370 {
371    value =
372       nir_build_alu(&state->b,
373                     nir_type_conversion_op(nir_type_uint, nir_type_int,
374                                            nir_rounding_mode_undef),
375                     value, NULL, NULL, NULL);
376    return value;
377 }
378 
379 struct packing_store_values {
380    bool is_64bit;
381    unsigned writemasks[2];
382    nir_ssa_def *values[2];
383    nir_deref_instr *deref;
384 };
385 
386 /**
387  * Make an ir_assignment from \c rhs to \c lhs, performing appropriate
388  * bitcasts if necessary to match up types.
389  *
390  * This function is called when packing varyings.
391  */
392 static struct packing_store_values *
bitwise_assign_pack(struct lower_packed_varyings_state * state,nir_deref_instr * packed_deref,nir_deref_instr * unpacked_deref,const struct glsl_type * unpacked_type,nir_ssa_def * value,unsigned writemask)393 bitwise_assign_pack(struct lower_packed_varyings_state *state,
394                     nir_deref_instr *packed_deref,
395                     nir_deref_instr *unpacked_deref,
396                     const struct glsl_type *unpacked_type,
397                     nir_ssa_def *value,
398                     unsigned writemask)
399 
400 {
401    nir_variable *packed_var = nir_deref_instr_get_variable(packed_deref);
402 
403    enum glsl_base_type packed_base_type = glsl_get_base_type(packed_var->type);
404    enum glsl_base_type unpacked_base_type = glsl_get_base_type(unpacked_type);
405 
406    struct packing_store_values *store_state =
407       calloc(1, sizeof(struct packing_store_values));
408 
409    if (unpacked_base_type != packed_base_type) {
410       /* Since we only mix types in flat varyings, and we always store flat
411        * varyings as type ivec4, we need only produce conversions from (uint
412        * or float) to int.
413        */
414       assert(packed_base_type == GLSL_TYPE_INT);
415       switch (unpacked_base_type) {
416       case GLSL_TYPE_UINT:
417          value = u2i(state, value);
418          break;
419       case GLSL_TYPE_FLOAT:
420          value = nir_mov(&state->b, value);
421          break;
422       case GLSL_TYPE_DOUBLE:
423       case GLSL_TYPE_UINT64:
424       case GLSL_TYPE_INT64:
425          assert(glsl_get_vector_elements(unpacked_type) <= 2);
426          if (glsl_get_vector_elements(unpacked_type) == 2) {
427             assert(glsl_get_vector_elements(packed_var->type) == 4);
428 
429             unsigned swiz_x = 0;
430             unsigned writemask = 0x3;
431             nir_ssa_def *swizzle = nir_swizzle(&state->b, value, &swiz_x, 1);
432             nir_ssa_def *x_value = nir_unpack_64_2x32(&state->b, swizzle);
433             if (unpacked_base_type != GLSL_TYPE_INT64)
434                x_value = u2i(state, x_value);
435 
436             store_state->is_64bit = true;
437             store_state->deref = packed_deref;
438             store_state->values[0] = x_value;
439             store_state->writemasks[0] = writemask;
440 
441             unsigned swiz_y = 1;
442             writemask = 0xc;
443             swizzle = nir_swizzle(&state->b, value, &swiz_y, 1);
444             nir_ssa_def *y_value = nir_unpack_64_2x32(&state->b, swizzle);
445             if (unpacked_base_type != GLSL_TYPE_INT64)
446                y_value = u2i(state, y_value);
447 
448             store_state->deref = packed_deref;
449             store_state->values[1] = y_value;
450             store_state->writemasks[1] = writemask;
451             return store_state;
452          } else {
453             value = nir_unpack_64_2x32(&state->b, value);
454 
455             if (unpacked_base_type != GLSL_TYPE_INT64)
456                value = u2i(state, value);
457          }
458          break;
459       case GLSL_TYPE_SAMPLER:
460       case GLSL_TYPE_IMAGE:
461          value = u2i(state, nir_unpack_64_2x32(&state->b, value));
462          break;
463       default:
464          assert(!"Unexpected type conversion while lowering varyings");
465          break;
466       }
467    }
468 
469    store_state->deref = packed_deref;
470    store_state->values[0] = value;
471    store_state->writemasks[0] = writemask;
472 
473    return store_state;
474 }
475 
476 /**
477  * This function is called when unpacking varyings.
478  */
479 static struct packing_store_values *
bitwise_assign_unpack(struct lower_packed_varyings_state * state,nir_deref_instr * unpacked_deref,nir_deref_instr * packed_deref,const struct glsl_type * unpacked_type,nir_ssa_def * value,unsigned writemask)480 bitwise_assign_unpack(struct lower_packed_varyings_state *state,
481                       nir_deref_instr *unpacked_deref,
482                       nir_deref_instr *packed_deref,
483                       const struct glsl_type *unpacked_type,
484                       nir_ssa_def *value, unsigned writemask)
485 {
486    nir_variable *packed_var = nir_deref_instr_get_variable(packed_deref);
487 
488    const struct glsl_type *packed_type = glsl_without_array(packed_var->type);
489    enum glsl_base_type packed_base_type = glsl_get_base_type(packed_type);
490    enum glsl_base_type unpacked_base_type = glsl_get_base_type(unpacked_type);
491 
492    struct packing_store_values *store_state =
493       calloc(1, sizeof(struct packing_store_values));
494 
495    if (unpacked_base_type != packed_base_type) {
496       /* Since we only mix types in flat varyings, and we always store flat
497        * varyings as type ivec4, we need only produce conversions from int to
498        * (uint or float).
499        */
500       assert(packed_base_type == GLSL_TYPE_INT);
501 
502       switch (unpacked_base_type) {
503       case GLSL_TYPE_UINT:
504          value = i2u(state, value);
505          break;
506       case GLSL_TYPE_FLOAT:
507          value = nir_mov(&state->b, value);
508          break;
509       case GLSL_TYPE_DOUBLE:
510       case GLSL_TYPE_UINT64:
511       case GLSL_TYPE_INT64:
512          assert(glsl_get_vector_elements(unpacked_type) <= 2);
513          if (glsl_get_vector_elements(unpacked_type) == 2) {
514             assert(glsl_get_vector_elements(packed_type) == 4);
515 
516             unsigned swiz_xy[2] = {0, 1};
517             writemask = 1 << (ffs(writemask) - 1);
518             nir_ssa_def *xy_value = nir_swizzle(&state->b, value, swiz_xy, 2);
519             if (unpacked_base_type != GLSL_TYPE_INT64)
520                xy_value = i2u(state, xy_value);
521 
522             xy_value = nir_pack_64_2x32(&state->b, xy_value);
523             store_state->is_64bit = true;
524             store_state->deref = unpacked_deref;
525             store_state->values[0] = xy_value;
526             store_state->writemasks[0] = writemask;
527 
528             unsigned swiz_zw[2] = {2, 3};
529             writemask = writemask << 1;
530             nir_ssa_def *zw_value = nir_swizzle(&state->b, value, swiz_zw, 2);
531             if (unpacked_base_type != GLSL_TYPE_INT64)
532                zw_value = i2u(state, zw_value);
533 
534             zw_value = nir_pack_64_2x32(&state->b, zw_value);
535             store_state->deref = unpacked_deref;
536             store_state->values[1] = zw_value;
537             store_state->writemasks[1] = writemask;
538 
539             return store_state;
540          } else {
541             if (unpacked_base_type != GLSL_TYPE_INT64)
542                value = i2u(state, value);
543 
544             value = nir_pack_64_2x32(&state->b, value);
545          }
546          break;
547       case GLSL_TYPE_SAMPLER:
548       case GLSL_TYPE_IMAGE:
549          value = nir_pack_64_2x32(&state->b, i2u(state, value));
550          break;
551       default:
552          assert(!"Unexpected type conversion while lowering varyings");
553          break;
554       }
555    }
556 
557    store_state->deref = unpacked_deref;
558    store_state->values[0] = value;
559    store_state->writemasks[0] = writemask;
560 
561    return store_state;
562 }
563 
564 static void
create_store_deref(struct lower_packed_varyings_state * state,nir_deref_instr * deref,nir_ssa_def * value,unsigned writemask,bool is_64bit)565 create_store_deref(struct lower_packed_varyings_state *state,
566                    nir_deref_instr *deref, nir_ssa_def *value,
567                    unsigned writemask, bool is_64bit)
568 {
569    /* If dest and value have different number of components pack the srcs
570     * into a vector.
571     */
572    const struct glsl_type *type = glsl_without_array(deref->type);
573    unsigned comps = glsl_get_vector_elements(type);
574    if (value->num_components != comps) {
575       nir_ssa_def *srcs[4];
576 
577       unsigned comp = 0;
578       for (unsigned i = 0; i < comps; i++) {
579          if (writemask & (1 << i)) {
580             if (is_64bit && state->mode == nir_var_shader_in)
581                srcs[i] = value;
582             else
583                srcs[i] = nir_swizzle(&state->b, value, &comp, 1);
584             comp++;
585          } else {
586             srcs[i] = nir_ssa_undef(&state->b, 1,
587                                     glsl_type_is_64bit(type) ? 64 : 32);
588          }
589       }
590       value = nir_vec(&state->b, srcs, comps);
591    }
592 
593    nir_store_deref(&state->b, deref, value, writemask);
594 }
595 
596 static unsigned
597 lower_varying(struct lower_packed_varyings_state *state,
598               nir_ssa_def *rhs_swizzle, unsigned writemask,
599               const struct glsl_type *type, unsigned fine_location,
600               nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
601               const char *name, bool gs_input_toplevel, unsigned vertex_index);
602 
603 /**
604  * Recursively pack or unpack a varying for which we need to iterate over its
605  * constituent elements.
606  * This takes care of both arrays and matrices.
607  *
608  * \param gs_input_toplevel should be set to true if we are lowering geometry
609  * shader inputs, and we are currently lowering the whole input variable
610  * (i.e. we are lowering the array whose index selects the vertex).
611  *
612  * \param vertex_index: if we are lowering geometry shader inputs, and the
613  * level of the array that we are currently lowering is *not* the top level,
614  * then this indicates which vertex we are currently lowering.  Otherwise it
615  * is ignored.
616  */
617 static unsigned
lower_arraylike(struct lower_packed_varyings_state * state,nir_ssa_def * rhs_swizzle,unsigned writemask,const struct glsl_type * type,unsigned fine_location,nir_variable * unpacked_var,nir_deref_instr * unpacked_var_deref,const char * name,bool gs_input_toplevel,unsigned vertex_index)618 lower_arraylike(struct lower_packed_varyings_state *state,
619                 nir_ssa_def *rhs_swizzle, unsigned writemask,
620                 const struct glsl_type *type, unsigned fine_location,
621                 nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
622                 const char *name, bool gs_input_toplevel, unsigned vertex_index)
623 {
624    unsigned array_size = glsl_get_length(type);
625    unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
626    if (array_size * dmul + fine_location % 4 > 4) {
627       fine_location = ALIGN_POT(fine_location, dmul);
628    }
629 
630    type = glsl_get_array_element(type);
631    for (unsigned i = 0; i < array_size; i++) {
632       nir_load_const_instr *c_idx =
633          nir_load_const_instr_create(state->b.shader, 1, 32);
634       c_idx->value[0].u32 = i;
635       nir_builder_instr_insert(&state->b, &c_idx->instr);
636 
637       nir_deref_instr *unpacked_array_deref =
638          nir_build_deref_array(&state->b, unpacked_var_deref, &c_idx->def);
639 
640       if (gs_input_toplevel) {
641          /* Geometry shader inputs are a special case.  Instead of storing
642           * each element of the array at a different location, all elements
643           * are at the same location, but with a different vertex index.
644           */
645          (void) lower_varying(state, rhs_swizzle, writemask, type, fine_location,
646                               unpacked_var, unpacked_array_deref, name, false, i);
647       } else {
648          char *subscripted_name = name ?
649             ralloc_asprintf(state->mem_ctx, "%s[%d]", name, i) : NULL;
650          fine_location =
651             lower_varying(state, rhs_swizzle, writemask, type, fine_location,
652                           unpacked_var, unpacked_array_deref,
653                           subscripted_name, false, vertex_index);
654       }
655    }
656 
657    return fine_location;
658 }
659 
660 /**
661  * Recursively pack or unpack the given varying (or portion of a varying) by
662  * traversing all of its constituent vectors.
663  *
664  * \param fine_location is the location where the first constituent vector
665  * should be packed--the word "fine" indicates that this location is expressed
666  * in multiples of a float, rather than multiples of a vec4 as is used
667  * elsewhere in Mesa.
668  *
669  * \param gs_input_toplevel should be set to true if we are lowering geometry
670  * shader inputs, and we are currently lowering the whole input variable
671  * (i.e. we are lowering the array whose index selects the vertex).
672  *
673  * \param vertex_index: if we are lowering geometry shader inputs, and the
674  * level of the array that we are currently lowering is *not* the top level,
675  * then this indicates which vertex we are currently lowering.  Otherwise it
676  * is ignored.
677  *
678  * \return the location where the next constituent vector (after this one)
679  * should be packed.
680  */
681 static unsigned
lower_varying(struct lower_packed_varyings_state * state,nir_ssa_def * rhs_swizzle,unsigned writemask,const struct glsl_type * type,unsigned fine_location,nir_variable * unpacked_var,nir_deref_instr * unpacked_var_deref,const char * name,bool gs_input_toplevel,unsigned vertex_index)682 lower_varying(struct lower_packed_varyings_state *state,
683               nir_ssa_def *rhs_swizzle, unsigned writemask,
684               const struct glsl_type *type, unsigned fine_location,
685               nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
686               const char *name, bool gs_input_toplevel, unsigned vertex_index)
687 {
688    unsigned dmul = glsl_type_is_64bit(type) ? 2 : 1;
689    /* When gs_input_toplevel is set, we should be looking at a geometry shader
690     * input array.
691     */
692    assert(!gs_input_toplevel || glsl_type_is_array(type));
693 
694    if (glsl_type_is_struct(type)) {
695       unsigned struct_len = glsl_get_length(type);
696       for (unsigned i = 0; i < struct_len; i++) {
697          const char *field_name = glsl_get_struct_elem_name(type, i);
698          char *deref_name = name ?
699             ralloc_asprintf(state->mem_ctx, "%s.%s", name, field_name) :
700             NULL;
701          const struct glsl_type *field_type = glsl_get_struct_field(type, i);
702 
703          nir_deref_instr *unpacked_struct_deref =
704             nir_build_deref_struct(&state->b, unpacked_var_deref, i);
705          fine_location = lower_varying(state, rhs_swizzle, writemask, field_type,
706                                        fine_location, unpacked_var,
707                                        unpacked_struct_deref, deref_name,
708                                        false, vertex_index);
709       }
710 
711       return fine_location;
712    } else if (glsl_type_is_array(type)) {
713       /* Arrays are packed/unpacked by considering each array element in
714        * sequence.
715        */
716       return lower_arraylike(state, rhs_swizzle, writemask, type, fine_location,
717                              unpacked_var, unpacked_var_deref, name,
718                              gs_input_toplevel, vertex_index);
719    } else if (glsl_type_is_matrix(type)) {
720       /* Matrices are packed/unpacked by considering each column vector in
721        * sequence.
722        */
723       return lower_arraylike(state, rhs_swizzle, writemask, type, fine_location,
724                              unpacked_var, unpacked_var_deref, name, false,
725                              vertex_index);
726    } else if (glsl_get_vector_elements(type) * dmul + fine_location % 4 > 4) {
727       /* We don't have code to split up 64bit variable between two
728        * varying slots, instead we add padding if necessary.
729        */
730       unsigned aligned_fine_location = ALIGN_POT(fine_location, dmul);
731       if (aligned_fine_location != fine_location) {
732          return lower_varying(state, rhs_swizzle, writemask, type,
733                               aligned_fine_location, unpacked_var,
734                               unpacked_var_deref, name, false, vertex_index);
735       }
736 
737       /* This vector is going to be "double parked" across two varying slots,
738        * so handle it as two separate assignments. For doubles, a dvec3/dvec4
739        * can end up being spread over 3 slots. However the second splitting
740        * will happen later, here we just always want to split into 2.
741        */
742       unsigned left_components, right_components;
743       unsigned left_swizzle_values[4] = { 0, 0, 0, 0 };
744       unsigned right_swizzle_values[4] = { 0, 0, 0, 0 };
745       char left_swizzle_name[4] = { 0, 0, 0, 0 };
746       char right_swizzle_name[4] = { 0, 0, 0, 0 };
747 
748       left_components = 4 - fine_location % 4;
749       if (glsl_type_is_64bit(type)) {
750          left_components /= 2;
751          assert(left_components > 0);
752       }
753       right_components = glsl_get_vector_elements(type) - left_components;
754 
755       /* If set use previously set writemask to offset the following
756        * swizzle/writemasks. This can happen when spliting a dvec, etc across
757        * slots.
758        */
759       unsigned offset = 0;
760       if (writemask) {
761          for (unsigned i = 0; i < left_components; i++) {
762             /* Keep going until we find the first component of the write */
763             if (!(writemask & (1 << i))) {
764                offset++;
765             } else
766                break;
767          }
768       }
769 
770       for (unsigned i = 0; i < left_components; i++) {
771          left_swizzle_values[i] = i + offset;
772          left_swizzle_name[i] = "xyzw"[i + offset];
773       }
774       for (unsigned i = 0; i < right_components; i++) {
775          right_swizzle_values[i] = i + left_components + offset;
776          right_swizzle_name[i] = "xyzw"[i + left_components + offset];
777       }
778 
779       if (left_components) {
780          char *left_name = name ?
781             ralloc_asprintf(state->mem_ctx, "%s.%s", name, left_swizzle_name) :
782             NULL;
783 
784          nir_ssa_def *left_swizzle = NULL;
785          unsigned left_writemask = ~0u;
786          if (state->mode == nir_var_shader_out) {
787             nir_ssa_def *ssa_def = rhs_swizzle ?
788                rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref);
789             left_swizzle =
790                nir_swizzle(&state->b, ssa_def,
791                            left_swizzle_values, left_components);
792          } else {
793             left_writemask = ((1 << left_components) - 1) << offset;
794          }
795 
796          const struct glsl_type *swiz_type =
797             glsl_vector_type(glsl_get_base_type(type), left_components);
798          fine_location = lower_varying(state, left_swizzle, left_writemask, swiz_type,
799                                        fine_location, unpacked_var, unpacked_var_deref,
800                                        left_name, false, vertex_index);
801       } else {
802          /* Top up the fine location to the next slot */
803          fine_location++;
804       }
805 
806       char *right_name = name ?
807          ralloc_asprintf(state->mem_ctx, "%s.%s", name, right_swizzle_name) :
808          NULL;
809 
810       nir_ssa_def *right_swizzle = NULL;
811       unsigned right_writemask = ~0u;
812       if (state->mode == nir_var_shader_out) {
813         nir_ssa_def *ssa_def = rhs_swizzle ?
814            rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref);
815         right_swizzle =
816            nir_swizzle(&state->b, ssa_def,
817                        right_swizzle_values, right_components);
818       } else {
819          right_writemask = ((1 << right_components) - 1) << (left_components + offset);
820       }
821 
822       const struct glsl_type *swiz_type =
823          glsl_vector_type(glsl_get_base_type(type), right_components);
824       return lower_varying(state, right_swizzle, right_writemask, swiz_type,
825                            fine_location, unpacked_var, unpacked_var_deref,
826                            right_name, false, vertex_index);
827    } else {
828       /* No special handling is necessary; (un)pack the old varying (now temp)
829        * from/into the new packed varying.
830        */
831       unsigned components = glsl_get_vector_elements(type) * dmul;
832       unsigned location = fine_location / 4;
833       unsigned location_frac = fine_location % 4;
834 
835       assert(state->components[location - VARYING_SLOT_VAR0] >= components);
836       nir_deref_instr *packed_deref =
837          get_packed_varying_deref(state, location, unpacked_var, name,
838                                   vertex_index);
839 
840       nir_variable *packed_var =
841          state->packed_varyings[location - VARYING_SLOT_VAR0];
842       if (unpacked_var->data.stream != 0) {
843          assert(unpacked_var->data.stream < 4);
844          for (unsigned i = 0; i < components; ++i) {
845             packed_var->data.stream |=
846                unpacked_var->data.stream << (2 * (location_frac + i));
847          }
848       }
849 
850       struct packing_store_values *store_value;
851       if (state->mode == nir_var_shader_out) {
852          unsigned writemask = ((1 << components) - 1) << location_frac;
853          nir_ssa_def *value = rhs_swizzle ? rhs_swizzle :
854             nir_load_deref(&state->b, unpacked_var_deref);
855 
856          store_value =
857             bitwise_assign_pack(state, packed_deref, unpacked_var_deref, type,
858                                 value, writemask);
859       } else {
860          unsigned swizzle_values[4] = { 0, 0, 0, 0 };
861          for (unsigned i = 0; i < components; ++i) {
862             swizzle_values[i] = i + location_frac;
863          }
864 
865          nir_ssa_def *ssa_def = &packed_deref->dest.ssa;
866          ssa_def = nir_load_deref(&state->b, packed_deref);
867          nir_ssa_def *swizzle =
868             nir_swizzle(&state->b, ssa_def, swizzle_values, components);
869 
870          store_value = bitwise_assign_unpack(state, unpacked_var_deref,
871                                              packed_deref, type, swizzle,
872                                              writemask);
873       }
874 
875       create_store_deref(state, store_value->deref, store_value->values[0],
876                          store_value->writemasks[0], store_value->is_64bit);
877       if (store_value->is_64bit) {
878          create_store_deref(state, store_value->deref, store_value->values[1],
879                             store_value->writemasks[1], store_value->is_64bit);
880       }
881 
882       free(store_value);
883       return fine_location + components;
884    }
885 }
886 
887 /* Recursively pack varying. */
888 static void
pack_output_var(struct lower_packed_varyings_state * state,nir_variable * var)889 pack_output_var(struct lower_packed_varyings_state *state, nir_variable *var)
890 {
891    nir_deref_instr *unpacked_var_deref = nir_build_deref_var(&state->b, var);
892    lower_varying(state, NULL, ~0u, var->type,
893                  var->data.location * 4 + var->data.location_frac,
894                  var, unpacked_var_deref, var->name,
895                  state->gs_input_vertices != 0, 0);
896 }
897 
898 static void
lower_output_var(struct lower_packed_varyings_state * state,nir_variable * var)899 lower_output_var(struct lower_packed_varyings_state *state, nir_variable *var)
900 {
901    if (var->data.mode != state->mode ||
902        var->data.location < VARYING_SLOT_VAR0 || !needs_lowering(state, var))
903       return;
904 
905       /* Skip any new packed varyings we just added */
906    if (strncmp("packed:", var->name, 7) == 0)
907       return;
908 
909    /* This lowering pass is only capable of packing floats and ints
910     * together when their interpolation mode is "flat".  Treat integers as
911     * being flat when the interpolation mode is none.
912     */
913    assert(var->data.interpolation == INTERP_MODE_FLAT ||
914           var->data.interpolation == INTERP_MODE_NONE ||
915           !glsl_contains_integer(var->type));
916 
917    if (state->prog->SeparateShader && state->ifc_exposed_to_query_api) {
918       struct set *resource_set = _mesa_pointer_set_create(NULL);
919 
920       nir_add_packed_var_to_resource_list(state->consts, state->prog,
921                                           resource_set, var,
922                                           state->shader->info.stage,
923                                           GL_PROGRAM_OUTPUT);
924 
925       _mesa_set_destroy(resource_set, NULL);
926    }
927 
928    /* Change the old varying into an ordinary global. */
929    var->data.mode = nir_var_shader_temp;
930 
931    nir_foreach_block(block, state->impl) {
932       if (state->shader->info.stage != MESA_SHADER_GEOMETRY) {
933          /* For shaders other than geometry, outputs need to be lowered before
934           * each return statement and at the end of main()
935           */
936          if (nir_block_ends_in_return_or_halt(block)) {
937             state->b.cursor = nir_before_instr(nir_block_last_instr(block));
938             pack_output_var(state, var);
939          } else if (block == nir_impl_last_block(state->impl)) {
940             state->b.cursor = nir_after_block(block);
941             pack_output_var(state, var);
942          }
943       } else {
944         /* For geometry shaders, outputs need to be lowered before each call
945          * to EmitVertex()
946          */
947          nir_foreach_instr_safe(instr, block) {
948             if (instr->type != nir_instr_type_intrinsic)
949                continue;
950 
951             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
952             if (intrin->intrinsic != nir_intrinsic_emit_vertex)
953                continue;
954 
955             state->b.cursor = nir_before_instr(instr);
956             pack_output_var(state, var);
957          }
958       }
959    }
960 }
961 
962 static void
lower_packed_outputs(struct lower_packed_varyings_state * state)963 lower_packed_outputs(struct lower_packed_varyings_state *state)
964 {
965    nir_foreach_shader_out_variable_safe(var, state->shader) {
966       lower_output_var(state, var);
967    }
968 }
969 
970 static void
lower_packed_inputs(struct lower_packed_varyings_state * state)971 lower_packed_inputs(struct lower_packed_varyings_state *state)
972 {
973    /* Shader inputs need to be lowered at the beginning of main() so set bulder
974     * cursor to insert packing code at the start of the main function.
975     */
976    state->b.cursor = nir_before_block(nir_start_block(state->impl));
977 
978    /* insert new varyings, lower old ones to locals and add unpacking code a
979     * the start of the shader.
980     */
981    nir_foreach_shader_in_variable_safe(var, state->shader) {
982       if (var->data.mode != state->mode ||
983           var->data.location < VARYING_SLOT_VAR0 || !needs_lowering(state, var))
984          continue;
985 
986       /* Skip any new packed varyings we just added */
987       if (strncmp("packed:", var->name, 7) == 0)
988          continue;
989 
990       /* This lowering pass is only capable of packing floats and ints
991        * together when their interpolation mode is "flat".  Treat integers as
992        * being flat when the interpolation mode is none.
993        */
994       assert(var->data.interpolation == INTERP_MODE_FLAT ||
995              var->data.interpolation == INTERP_MODE_NONE ||
996              !glsl_contains_integer(var->type));
997 
998       /* Program interface needs to expose varyings in case of SSO. Add the
999        * variable for program resource list before it gets modified and lost.
1000        */
1001       if (state->prog->SeparateShader && state->ifc_exposed_to_query_api) {
1002          struct set *resource_set = _mesa_pointer_set_create(NULL);
1003 
1004          nir_add_packed_var_to_resource_list(state->consts, state->prog,
1005                                              resource_set, var,
1006                                              state->shader->info.stage,
1007                                              GL_PROGRAM_INPUT);
1008 
1009          _mesa_set_destroy(resource_set, NULL);
1010       }
1011 
1012       /* Change the old varying into an ordinary global. */
1013       var->data.mode = nir_var_shader_temp;
1014 
1015       /* Recursively unpack varying. */
1016       nir_deref_instr *unpacked_var_deref = nir_build_deref_var(&state->b, var);
1017       lower_varying(state, NULL, ~0u, var->type,
1018                     var->data.location * 4 + var->data.location_frac,
1019                     var, unpacked_var_deref, var->name,
1020                     state->gs_input_vertices != 0, 0);
1021    }
1022 }
1023 
1024 void
gl_nir_lower_packed_varyings(const struct gl_constants * consts,struct gl_shader_program * prog,void * mem_ctx,unsigned locations_used,const uint8_t * components,nir_variable_mode mode,unsigned gs_input_vertices,struct gl_linked_shader * linked_shader,bool disable_varying_packing,bool disable_xfb_packing,bool xfb_enabled)1025 gl_nir_lower_packed_varyings(const struct gl_constants *consts,
1026                              struct gl_shader_program *prog,
1027                              void *mem_ctx, unsigned locations_used,
1028                              const uint8_t *components,
1029                              nir_variable_mode mode, unsigned gs_input_vertices,
1030                              struct gl_linked_shader *linked_shader,
1031                              bool disable_varying_packing,
1032                              bool disable_xfb_packing, bool xfb_enabled)
1033 {
1034    struct lower_packed_varyings_state state;
1035    nir_shader *shader = linked_shader->Program->nir;
1036    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1037 
1038    assert(shader->info.stage != MESA_SHADER_COMPUTE);
1039 
1040    /* assert that functions have been inlined before packing is called */
1041    nir_foreach_function(f, shader) {
1042       assert(f->impl == impl);
1043    }
1044 
1045    nir_builder_init(&state.b, impl);
1046    state.consts = consts;
1047    state.prog = prog;
1048    state.mem_ctx = mem_ctx;
1049    state.shader = shader;
1050    state.impl = impl;
1051    state.locations_used = locations_used;
1052    state.components = components;
1053    state.mode = mode;
1054    state.gs_input_vertices = gs_input_vertices;
1055    state.disable_varying_packing = disable_varying_packing;
1056    state.disable_xfb_packing = disable_xfb_packing;
1057    state.xfb_enabled = xfb_enabled;
1058    state.packed_varyings =
1059       (nir_variable **) rzalloc_array_size(mem_ctx, sizeof(nir_variable *),
1060                                            locations_used);
1061 
1062    /* Determine if the shader interface is exposed to api query */
1063    struct gl_linked_shader *linked_shaders[MESA_SHADER_STAGES];
1064    unsigned num_shaders = 0;
1065    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
1066       if (prog->_LinkedShaders[i])
1067          linked_shaders[num_shaders++] = prog->_LinkedShaders[i];
1068    }
1069 
1070    if (mode == nir_var_shader_in) {
1071       state.ifc_exposed_to_query_api = linked_shaders[0] == linked_shader;
1072       lower_packed_inputs(&state);
1073    } else {
1074       state.ifc_exposed_to_query_api =
1075          linked_shaders[num_shaders - 1] == linked_shader;
1076       lower_packed_outputs(&state);
1077    }
1078 
1079    nir_lower_global_vars_to_local(shader);
1080    nir_fixup_deref_modes(shader);
1081 }
1082