1 /*
2 * Copyright © 2011 Intel Corporation
3 * Copyright © 2022 Valve Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * This lowering pass generates GLSL code that manually packs varyings into
27 * vec4 slots, for the benefit of back-ends that don't support packed varyings
28 * natively.
29 *
30 * For example, the following shader:
31 *
32 * out mat3x2 foo; // location=4, location_frac=0
33 * out vec3 bar[2]; // location=5, location_frac=2
34 *
35 * main()
36 * {
37 * ...
38 * }
39 *
40 * Is rewritten to:
41 *
42 * mat3x2 foo;
43 * vec3 bar[2];
44 * out vec4 packed4; // location=4, location_frac=0
45 * out vec4 packed5; // location=5, location_frac=0
46 * out vec4 packed6; // location=6, location_frac=0
47 *
48 * main()
49 * {
50 * ...
51 * packed4.xy = foo[0];
52 * packed4.zw = foo[1];
53 * packed5.xy = foo[2];
54 * packed5.zw = bar[0].xy;
55 * packed6.x = bar[0].z;
56 * packed6.yzw = bar[1];
57 * }
58 *
59 * This lowering pass properly handles "double parking" of a varying vector
60 * across two varying slots. For example, in the code above, two of the
61 * components of bar[0] are stored in packed5, and the remaining component is
62 * stored in packed6.
63 *
64 * Note that in theory, the extra instructions may cause some loss of
65 * performance. However, hopefully in most cases the performance loss will
66 * either be absorbed by a later optimization pass, or it will be offset by
67 * memory bandwidth savings (because fewer varyings are used).
68 *
69 * This lowering pass also packs flat floats, ints, and uints together, by
70 * using ivec4 as the base type of flat "varyings", and using appropriate
71 * casts to convert floats and uints into ints.
72 *
73 * This lowering pass also handles varyings whose type is a struct or an array
74 * of struct. Structs are packed in order and with no gaps, so there may be a
75 * performance penalty due to structure elements being double-parked.
76 *
77 * Lowering of geometry shader inputs is slightly more complex, since geometry
78 * inputs are always arrays, so we need to lower arrays to arrays. For
79 * example, the following input:
80 *
81 * in struct Foo {
82 * float f;
83 * vec3 v;
84 * vec2 a[2];
85 * } arr[3]; // location=4, location_frac=0
86 *
87 * Would get lowered like this if it occurred in a fragment shader:
88 *
89 * struct Foo {
90 * float f;
91 * vec3 v;
92 * vec2 a[2];
93 * } arr[3];
94 * in vec4 packed4; // location=4, location_frac=0
95 * in vec4 packed5; // location=5, location_frac=0
96 * in vec4 packed6; // location=6, location_frac=0
97 * in vec4 packed7; // location=7, location_frac=0
98 * in vec4 packed8; // location=8, location_frac=0
99 * in vec4 packed9; // location=9, location_frac=0
100 *
101 * main()
102 * {
103 * arr[0].f = packed4.x;
104 * arr[0].v = packed4.yzw;
105 * arr[0].a[0] = packed5.xy;
106 * arr[0].a[1] = packed5.zw;
107 * arr[1].f = packed6.x;
108 * arr[1].v = packed6.yzw;
109 * arr[1].a[0] = packed7.xy;
110 * arr[1].a[1] = packed7.zw;
111 * arr[2].f = packed8.x;
112 * arr[2].v = packed8.yzw;
113 * arr[2].a[0] = packed9.xy;
114 * arr[2].a[1] = packed9.zw;
115 * ...
116 * }
117 *
118 * But it would get lowered like this if it occurred in a geometry shader:
119 *
120 * struct Foo {
121 * float f;
122 * vec3 v;
123 * vec2 a[2];
124 * } arr[3];
125 * in vec4 packed4[3]; // location=4, location_frac=0
126 * in vec4 packed5[3]; // location=5, location_frac=0
127 *
128 * main()
129 * {
130 * arr[0].f = packed4[0].x;
131 * arr[0].v = packed4[0].yzw;
132 * arr[0].a[0] = packed5[0].xy;
133 * arr[0].a[1] = packed5[0].zw;
134 * arr[1].f = packed4[1].x;
135 * arr[1].v = packed4[1].yzw;
136 * arr[1].a[0] = packed5[1].xy;
137 * arr[1].a[1] = packed5[1].zw;
138 * arr[2].f = packed4[2].x;
139 * arr[2].v = packed4[2].yzw;
140 * arr[2].a[0] = packed5[2].xy;
141 * arr[2].a[1] = packed5[2].zw;
142 * ...
143 * }
144 */
145
146 #include "nir.h"
147 #include "nir_builder.h"
148 #include "gl_nir.h"
149 #include "gl_nir_linker.h"
150 #include "program/prog_instruction.h"
151 #include "main/mtypes.h"
152
153 /**
154 * Visitor that performs varying packing. For each varying declared in the
155 * shader, this visitor determines whether it needs to be packed. If so, it
156 * demotes it to an ordinary global, creates new packed varyings, and
157 * generates assignments to convert between the original varying and the
158 * packed varying.
159 */
160 struct lower_packed_varyings_state
161 {
162 const struct gl_constants *consts;
163
164 struct gl_shader_program *prog;
165
166 /**
167 * Memory context used to allocate new instructions for the shader.
168 */
169 void *mem_ctx;
170
171 /**
172 * Number of generic varying slots which are used by this shader. This is
173 * used to allocate temporary intermediate data structures. If any varying
174 * used by this shader has a location greater than or equal to
175 * VARYING_SLOT_VAR0 + locations_used, an assertion will fire.
176 */
177 unsigned locations_used;
178
179 const uint8_t* components;
180
181 /**
182 * Array of pointers to the packed varyings that have been created for each
183 * generic varying slot. NULL entries in this array indicate varying slots
184 * for which a packed varying has not been created yet.
185 */
186 nir_variable **packed_varyings;
187
188 nir_shader *shader;
189
190 nir_function_impl *impl;
191
192 nir_builder b;
193
194 /**
195 * Type of varying which is being lowered in this pass (either
196 * nir_var_shader_in or ir_var_shader_out).
197 */
198 nir_variable_mode mode;
199
200 /**
201 * If we are currently lowering geometry shader inputs, the number of input
202 * vertices the geometry shader accepts. Otherwise zero.
203 */
204 unsigned gs_input_vertices;
205
206 bool disable_varying_packing;
207 bool disable_xfb_packing;
208 bool xfb_enabled;
209 bool ifc_exposed_to_query_api;
210 };
211
212 static bool
needs_lowering(struct lower_packed_varyings_state * state,nir_variable * var)213 needs_lowering(struct lower_packed_varyings_state *state, nir_variable *var)
214 {
215 /* Things composed of vec4's, varyings with explicitly assigned
216 * locations or varyings marked as must_be_shader_input (which might be used
217 * by interpolateAt* functions) shouldn't be lowered. Everything else can be.
218 */
219 if (var->data.explicit_location || var->data.must_be_shader_input)
220 return false;
221
222 const struct glsl_type *type = var->type;
223 if (nir_is_arrayed_io(var, state->shader->info.stage) || var->data.per_view) {
224 assert(glsl_type_is_array(type));
225 type = glsl_get_array_element(type);
226 }
227
228 /* Some drivers (e.g. panfrost) don't support packing of transform
229 * feedback varyings.
230 */
231 if (state->disable_xfb_packing && var->data.is_xfb &&
232 !(glsl_type_is_array(type) || glsl_type_is_struct(type) || glsl_type_is_matrix(type)) &&
233 state->xfb_enabled)
234 return false;
235
236 /* Override disable_varying_packing if the var is only used by transform
237 * feedback. Also override it if transform feedback is enabled and the
238 * variable is an array, struct or matrix as the elements of these types
239 * will always have the same interpolation and therefore are safe to pack.
240 */
241 if (state->disable_varying_packing && !var->data.is_xfb_only &&
242 !((glsl_type_is_array(type) || glsl_type_is_struct(type) || glsl_type_is_matrix(type)) &&
243 state->xfb_enabled))
244 return false;
245
246 type = glsl_without_array(type);
247 if (glsl_get_vector_elements(type) == 4 && !glsl_type_is_64bit(type))
248 return false;
249 return true;
250 }
251
252 /**
253 * If no packed varying has been created for the given varying location yet,
254 * create it and add it to the shader.
255 *
256 * The newly created varying inherits its interpolation parameters from \c
257 * unpacked_var. Its base type is ivec4 if we are lowering a flat varying,
258 * vec4 otherwise.
259 */
260 static void
create_or_update_packed_varying(struct lower_packed_varyings_state * state,nir_variable * unpacked_var,const char * name,unsigned location,unsigned slot,unsigned vertex_index)261 create_or_update_packed_varying(struct lower_packed_varyings_state *state,
262 nir_variable *unpacked_var,
263 const char *name, unsigned location,
264 unsigned slot, unsigned vertex_index)
265 {
266 assert(slot < state->locations_used);
267 if (state->packed_varyings[slot] == NULL) {
268 assert(state->components[slot] != 0);
269 assert(name);
270
271 nir_variable *packed_var = rzalloc(state->shader, nir_variable);
272 packed_var->name = ralloc_asprintf(packed_var, "packed:%s", name);
273 packed_var->data.mode = state->mode;
274
275 bool is_interpolation_flat =
276 unpacked_var->data.interpolation == INTERP_MODE_FLAT ||
277 glsl_contains_integer(unpacked_var->type) ||
278 glsl_contains_double(unpacked_var->type);
279
280 const struct glsl_type *packed_type;
281 if (is_interpolation_flat)
282 packed_type = glsl_vector_type(GLSL_TYPE_INT, state->components[slot]);
283 else
284 packed_type = glsl_vector_type(GLSL_TYPE_FLOAT, state->components[slot]);
285
286 if (state->gs_input_vertices != 0) {
287 packed_type =
288 glsl_array_type(packed_type, state->gs_input_vertices, 0);
289 }
290
291 packed_var->type = packed_type;
292 packed_var->data.centroid = unpacked_var->data.centroid;
293 packed_var->data.sample = unpacked_var->data.sample;
294 packed_var->data.patch = unpacked_var->data.patch;
295 packed_var->data.interpolation = is_interpolation_flat ?
296 (unsigned) INTERP_MODE_FLAT : unpacked_var->data.interpolation;
297 packed_var->data.location = location;
298 packed_var->data.precision = unpacked_var->data.precision;
299 packed_var->data.always_active_io = unpacked_var->data.always_active_io;
300 packed_var->data.stream = NIR_STREAM_PACKED;
301
302 nir_shader_add_variable(state->shader, packed_var);
303 state->packed_varyings[slot] = packed_var;
304 } else {
305 nir_variable *var = state->packed_varyings[slot];
306
307 /* The slot needs to be marked as always active if any variable that got
308 * packed there was.
309 */
310 var->data.always_active_io |= unpacked_var->data.always_active_io;
311
312 /* For geometry shader inputs, only update the packed variable name the
313 * first time we visit each component.
314 */
315 if (state->gs_input_vertices == 0 || vertex_index == 0) {
316 assert(name);
317 ralloc_asprintf_append((char **) &var->name, ",%s", name);
318 }
319 }
320 }
321
322 /**
323 * Retrieve the packed varying corresponding to the given varying location.
324 *
325 * \param vertex_index: if we are lowering geometry shader inputs, then this
326 * indicates which vertex we are currently lowering. Otherwise it is ignored.
327 */
328 static nir_deref_instr *
get_packed_varying_deref(struct lower_packed_varyings_state * state,unsigned location,nir_variable * unpacked_var,const char * name,unsigned vertex_index)329 get_packed_varying_deref(struct lower_packed_varyings_state *state,
330 unsigned location, nir_variable *unpacked_var,
331 const char *name, unsigned vertex_index)
332 {
333 unsigned slot = location - VARYING_SLOT_VAR0;
334 assert(slot < state->locations_used);
335
336 create_or_update_packed_varying(state, unpacked_var, name, location, slot,
337 vertex_index);
338
339 nir_deref_instr *deref =
340 nir_build_deref_var(&state->b, state->packed_varyings[slot]);
341
342 if (state->gs_input_vertices != 0) {
343 /* When lowering GS inputs, the packed variable is an array, so we need
344 * to dereference it using vertex_index.
345 */
346 nir_load_const_instr *c_idx =
347 nir_load_const_instr_create(state->b.shader, 1, 32);
348 c_idx->value[0].u32 = vertex_index;
349 nir_builder_instr_insert(&state->b, &c_idx->instr);
350
351 deref = nir_build_deref_array(&state->b, deref, &c_idx->def);
352 }
353
354 return deref;
355 }
356
357 static nir_ssa_def *
i2u(struct lower_packed_varyings_state * state,nir_ssa_def * value)358 i2u(struct lower_packed_varyings_state *state, nir_ssa_def *value)
359 {
360 value =
361 nir_build_alu(&state->b,
362 nir_type_conversion_op(nir_type_int, nir_type_uint,
363 nir_rounding_mode_undef),
364 value, NULL, NULL, NULL);
365 return value;
366 }
367
368 static nir_ssa_def *
u2i(struct lower_packed_varyings_state * state,nir_ssa_def * value)369 u2i(struct lower_packed_varyings_state *state, nir_ssa_def *value)
370 {
371 value =
372 nir_build_alu(&state->b,
373 nir_type_conversion_op(nir_type_uint, nir_type_int,
374 nir_rounding_mode_undef),
375 value, NULL, NULL, NULL);
376 return value;
377 }
378
379 struct packing_store_values {
380 bool is_64bit;
381 unsigned writemasks[2];
382 nir_ssa_def *values[2];
383 nir_deref_instr *deref;
384 };
385
386 /**
387 * Make an ir_assignment from \c rhs to \c lhs, performing appropriate
388 * bitcasts if necessary to match up types.
389 *
390 * This function is called when packing varyings.
391 */
392 static struct packing_store_values *
bitwise_assign_pack(struct lower_packed_varyings_state * state,nir_deref_instr * packed_deref,nir_deref_instr * unpacked_deref,const struct glsl_type * unpacked_type,nir_ssa_def * value,unsigned writemask)393 bitwise_assign_pack(struct lower_packed_varyings_state *state,
394 nir_deref_instr *packed_deref,
395 nir_deref_instr *unpacked_deref,
396 const struct glsl_type *unpacked_type,
397 nir_ssa_def *value,
398 unsigned writemask)
399
400 {
401 nir_variable *packed_var = nir_deref_instr_get_variable(packed_deref);
402
403 enum glsl_base_type packed_base_type = glsl_get_base_type(packed_var->type);
404 enum glsl_base_type unpacked_base_type = glsl_get_base_type(unpacked_type);
405
406 struct packing_store_values *store_state =
407 calloc(1, sizeof(struct packing_store_values));
408
409 if (unpacked_base_type != packed_base_type) {
410 /* Since we only mix types in flat varyings, and we always store flat
411 * varyings as type ivec4, we need only produce conversions from (uint
412 * or float) to int.
413 */
414 assert(packed_base_type == GLSL_TYPE_INT);
415 switch (unpacked_base_type) {
416 case GLSL_TYPE_UINT:
417 value = u2i(state, value);
418 break;
419 case GLSL_TYPE_FLOAT:
420 value = nir_mov(&state->b, value);
421 break;
422 case GLSL_TYPE_DOUBLE:
423 case GLSL_TYPE_UINT64:
424 case GLSL_TYPE_INT64:
425 assert(glsl_get_vector_elements(unpacked_type) <= 2);
426 if (glsl_get_vector_elements(unpacked_type) == 2) {
427 assert(glsl_get_vector_elements(packed_var->type) == 4);
428
429 unsigned swiz_x = 0;
430 unsigned writemask = 0x3;
431 nir_ssa_def *swizzle = nir_swizzle(&state->b, value, &swiz_x, 1);
432 nir_ssa_def *x_value = nir_unpack_64_2x32(&state->b, swizzle);
433 if (unpacked_base_type != GLSL_TYPE_INT64)
434 x_value = u2i(state, x_value);
435
436 store_state->is_64bit = true;
437 store_state->deref = packed_deref;
438 store_state->values[0] = x_value;
439 store_state->writemasks[0] = writemask;
440
441 unsigned swiz_y = 1;
442 writemask = 0xc;
443 swizzle = nir_swizzle(&state->b, value, &swiz_y, 1);
444 nir_ssa_def *y_value = nir_unpack_64_2x32(&state->b, swizzle);
445 if (unpacked_base_type != GLSL_TYPE_INT64)
446 y_value = u2i(state, y_value);
447
448 store_state->deref = packed_deref;
449 store_state->values[1] = y_value;
450 store_state->writemasks[1] = writemask;
451 return store_state;
452 } else {
453 value = nir_unpack_64_2x32(&state->b, value);
454
455 if (unpacked_base_type != GLSL_TYPE_INT64)
456 value = u2i(state, value);
457 }
458 break;
459 case GLSL_TYPE_SAMPLER:
460 case GLSL_TYPE_IMAGE:
461 value = u2i(state, nir_unpack_64_2x32(&state->b, value));
462 break;
463 default:
464 assert(!"Unexpected type conversion while lowering varyings");
465 break;
466 }
467 }
468
469 store_state->deref = packed_deref;
470 store_state->values[0] = value;
471 store_state->writemasks[0] = writemask;
472
473 return store_state;
474 }
475
476 /**
477 * This function is called when unpacking varyings.
478 */
479 static struct packing_store_values *
bitwise_assign_unpack(struct lower_packed_varyings_state * state,nir_deref_instr * unpacked_deref,nir_deref_instr * packed_deref,const struct glsl_type * unpacked_type,nir_ssa_def * value,unsigned writemask)480 bitwise_assign_unpack(struct lower_packed_varyings_state *state,
481 nir_deref_instr *unpacked_deref,
482 nir_deref_instr *packed_deref,
483 const struct glsl_type *unpacked_type,
484 nir_ssa_def *value, unsigned writemask)
485 {
486 nir_variable *packed_var = nir_deref_instr_get_variable(packed_deref);
487
488 const struct glsl_type *packed_type = glsl_without_array(packed_var->type);
489 enum glsl_base_type packed_base_type = glsl_get_base_type(packed_type);
490 enum glsl_base_type unpacked_base_type = glsl_get_base_type(unpacked_type);
491
492 struct packing_store_values *store_state =
493 calloc(1, sizeof(struct packing_store_values));
494
495 if (unpacked_base_type != packed_base_type) {
496 /* Since we only mix types in flat varyings, and we always store flat
497 * varyings as type ivec4, we need only produce conversions from int to
498 * (uint or float).
499 */
500 assert(packed_base_type == GLSL_TYPE_INT);
501
502 switch (unpacked_base_type) {
503 case GLSL_TYPE_UINT:
504 value = i2u(state, value);
505 break;
506 case GLSL_TYPE_FLOAT:
507 value = nir_mov(&state->b, value);
508 break;
509 case GLSL_TYPE_DOUBLE:
510 case GLSL_TYPE_UINT64:
511 case GLSL_TYPE_INT64:
512 assert(glsl_get_vector_elements(unpacked_type) <= 2);
513 if (glsl_get_vector_elements(unpacked_type) == 2) {
514 assert(glsl_get_vector_elements(packed_type) == 4);
515
516 unsigned swiz_xy[2] = {0, 1};
517 writemask = 1 << (ffs(writemask) - 1);
518 nir_ssa_def *xy_value = nir_swizzle(&state->b, value, swiz_xy, 2);
519 if (unpacked_base_type != GLSL_TYPE_INT64)
520 xy_value = i2u(state, xy_value);
521
522 xy_value = nir_pack_64_2x32(&state->b, xy_value);
523 store_state->is_64bit = true;
524 store_state->deref = unpacked_deref;
525 store_state->values[0] = xy_value;
526 store_state->writemasks[0] = writemask;
527
528 unsigned swiz_zw[2] = {2, 3};
529 writemask = writemask << 1;
530 nir_ssa_def *zw_value = nir_swizzle(&state->b, value, swiz_zw, 2);
531 if (unpacked_base_type != GLSL_TYPE_INT64)
532 zw_value = i2u(state, zw_value);
533
534 zw_value = nir_pack_64_2x32(&state->b, zw_value);
535 store_state->deref = unpacked_deref;
536 store_state->values[1] = zw_value;
537 store_state->writemasks[1] = writemask;
538
539 return store_state;
540 } else {
541 if (unpacked_base_type != GLSL_TYPE_INT64)
542 value = i2u(state, value);
543
544 value = nir_pack_64_2x32(&state->b, value);
545 }
546 break;
547 case GLSL_TYPE_SAMPLER:
548 case GLSL_TYPE_IMAGE:
549 value = nir_pack_64_2x32(&state->b, i2u(state, value));
550 break;
551 default:
552 assert(!"Unexpected type conversion while lowering varyings");
553 break;
554 }
555 }
556
557 store_state->deref = unpacked_deref;
558 store_state->values[0] = value;
559 store_state->writemasks[0] = writemask;
560
561 return store_state;
562 }
563
564 static void
create_store_deref(struct lower_packed_varyings_state * state,nir_deref_instr * deref,nir_ssa_def * value,unsigned writemask,bool is_64bit)565 create_store_deref(struct lower_packed_varyings_state *state,
566 nir_deref_instr *deref, nir_ssa_def *value,
567 unsigned writemask, bool is_64bit)
568 {
569 /* If dest and value have different number of components pack the srcs
570 * into a vector.
571 */
572 const struct glsl_type *type = glsl_without_array(deref->type);
573 unsigned comps = glsl_get_vector_elements(type);
574 if (value->num_components != comps) {
575 nir_ssa_def *srcs[4];
576
577 unsigned comp = 0;
578 for (unsigned i = 0; i < comps; i++) {
579 if (writemask & (1 << i)) {
580 if (is_64bit && state->mode == nir_var_shader_in)
581 srcs[i] = value;
582 else
583 srcs[i] = nir_swizzle(&state->b, value, &comp, 1);
584 comp++;
585 } else {
586 srcs[i] = nir_ssa_undef(&state->b, 1,
587 glsl_type_is_64bit(type) ? 64 : 32);
588 }
589 }
590 value = nir_vec(&state->b, srcs, comps);
591 }
592
593 nir_store_deref(&state->b, deref, value, writemask);
594 }
595
596 static unsigned
597 lower_varying(struct lower_packed_varyings_state *state,
598 nir_ssa_def *rhs_swizzle, unsigned writemask,
599 const struct glsl_type *type, unsigned fine_location,
600 nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
601 const char *name, bool gs_input_toplevel, unsigned vertex_index);
602
603 /**
604 * Recursively pack or unpack a varying for which we need to iterate over its
605 * constituent elements.
606 * This takes care of both arrays and matrices.
607 *
608 * \param gs_input_toplevel should be set to true if we are lowering geometry
609 * shader inputs, and we are currently lowering the whole input variable
610 * (i.e. we are lowering the array whose index selects the vertex).
611 *
612 * \param vertex_index: if we are lowering geometry shader inputs, and the
613 * level of the array that we are currently lowering is *not* the top level,
614 * then this indicates which vertex we are currently lowering. Otherwise it
615 * is ignored.
616 */
617 static unsigned
lower_arraylike(struct lower_packed_varyings_state * state,nir_ssa_def * rhs_swizzle,unsigned writemask,const struct glsl_type * type,unsigned fine_location,nir_variable * unpacked_var,nir_deref_instr * unpacked_var_deref,const char * name,bool gs_input_toplevel,unsigned vertex_index)618 lower_arraylike(struct lower_packed_varyings_state *state,
619 nir_ssa_def *rhs_swizzle, unsigned writemask,
620 const struct glsl_type *type, unsigned fine_location,
621 nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
622 const char *name, bool gs_input_toplevel, unsigned vertex_index)
623 {
624 unsigned array_size = glsl_get_length(type);
625 unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
626 if (array_size * dmul + fine_location % 4 > 4) {
627 fine_location = ALIGN_POT(fine_location, dmul);
628 }
629
630 type = glsl_get_array_element(type);
631 for (unsigned i = 0; i < array_size; i++) {
632 nir_load_const_instr *c_idx =
633 nir_load_const_instr_create(state->b.shader, 1, 32);
634 c_idx->value[0].u32 = i;
635 nir_builder_instr_insert(&state->b, &c_idx->instr);
636
637 nir_deref_instr *unpacked_array_deref =
638 nir_build_deref_array(&state->b, unpacked_var_deref, &c_idx->def);
639
640 if (gs_input_toplevel) {
641 /* Geometry shader inputs are a special case. Instead of storing
642 * each element of the array at a different location, all elements
643 * are at the same location, but with a different vertex index.
644 */
645 (void) lower_varying(state, rhs_swizzle, writemask, type, fine_location,
646 unpacked_var, unpacked_array_deref, name, false, i);
647 } else {
648 char *subscripted_name = name ?
649 ralloc_asprintf(state->mem_ctx, "%s[%d]", name, i) : NULL;
650 fine_location =
651 lower_varying(state, rhs_swizzle, writemask, type, fine_location,
652 unpacked_var, unpacked_array_deref,
653 subscripted_name, false, vertex_index);
654 }
655 }
656
657 return fine_location;
658 }
659
660 /**
661 * Recursively pack or unpack the given varying (or portion of a varying) by
662 * traversing all of its constituent vectors.
663 *
664 * \param fine_location is the location where the first constituent vector
665 * should be packed--the word "fine" indicates that this location is expressed
666 * in multiples of a float, rather than multiples of a vec4 as is used
667 * elsewhere in Mesa.
668 *
669 * \param gs_input_toplevel should be set to true if we are lowering geometry
670 * shader inputs, and we are currently lowering the whole input variable
671 * (i.e. we are lowering the array whose index selects the vertex).
672 *
673 * \param vertex_index: if we are lowering geometry shader inputs, and the
674 * level of the array that we are currently lowering is *not* the top level,
675 * then this indicates which vertex we are currently lowering. Otherwise it
676 * is ignored.
677 *
678 * \return the location where the next constituent vector (after this one)
679 * should be packed.
680 */
681 static unsigned
lower_varying(struct lower_packed_varyings_state * state,nir_ssa_def * rhs_swizzle,unsigned writemask,const struct glsl_type * type,unsigned fine_location,nir_variable * unpacked_var,nir_deref_instr * unpacked_var_deref,const char * name,bool gs_input_toplevel,unsigned vertex_index)682 lower_varying(struct lower_packed_varyings_state *state,
683 nir_ssa_def *rhs_swizzle, unsigned writemask,
684 const struct glsl_type *type, unsigned fine_location,
685 nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
686 const char *name, bool gs_input_toplevel, unsigned vertex_index)
687 {
688 unsigned dmul = glsl_type_is_64bit(type) ? 2 : 1;
689 /* When gs_input_toplevel is set, we should be looking at a geometry shader
690 * input array.
691 */
692 assert(!gs_input_toplevel || glsl_type_is_array(type));
693
694 if (glsl_type_is_struct(type)) {
695 unsigned struct_len = glsl_get_length(type);
696 for (unsigned i = 0; i < struct_len; i++) {
697 const char *field_name = glsl_get_struct_elem_name(type, i);
698 char *deref_name = name ?
699 ralloc_asprintf(state->mem_ctx, "%s.%s", name, field_name) :
700 NULL;
701 const struct glsl_type *field_type = glsl_get_struct_field(type, i);
702
703 nir_deref_instr *unpacked_struct_deref =
704 nir_build_deref_struct(&state->b, unpacked_var_deref, i);
705 fine_location = lower_varying(state, rhs_swizzle, writemask, field_type,
706 fine_location, unpacked_var,
707 unpacked_struct_deref, deref_name,
708 false, vertex_index);
709 }
710
711 return fine_location;
712 } else if (glsl_type_is_array(type)) {
713 /* Arrays are packed/unpacked by considering each array element in
714 * sequence.
715 */
716 return lower_arraylike(state, rhs_swizzle, writemask, type, fine_location,
717 unpacked_var, unpacked_var_deref, name,
718 gs_input_toplevel, vertex_index);
719 } else if (glsl_type_is_matrix(type)) {
720 /* Matrices are packed/unpacked by considering each column vector in
721 * sequence.
722 */
723 return lower_arraylike(state, rhs_swizzle, writemask, type, fine_location,
724 unpacked_var, unpacked_var_deref, name, false,
725 vertex_index);
726 } else if (glsl_get_vector_elements(type) * dmul + fine_location % 4 > 4) {
727 /* We don't have code to split up 64bit variable between two
728 * varying slots, instead we add padding if necessary.
729 */
730 unsigned aligned_fine_location = ALIGN_POT(fine_location, dmul);
731 if (aligned_fine_location != fine_location) {
732 return lower_varying(state, rhs_swizzle, writemask, type,
733 aligned_fine_location, unpacked_var,
734 unpacked_var_deref, name, false, vertex_index);
735 }
736
737 /* This vector is going to be "double parked" across two varying slots,
738 * so handle it as two separate assignments. For doubles, a dvec3/dvec4
739 * can end up being spread over 3 slots. However the second splitting
740 * will happen later, here we just always want to split into 2.
741 */
742 unsigned left_components, right_components;
743 unsigned left_swizzle_values[4] = { 0, 0, 0, 0 };
744 unsigned right_swizzle_values[4] = { 0, 0, 0, 0 };
745 char left_swizzle_name[4] = { 0, 0, 0, 0 };
746 char right_swizzle_name[4] = { 0, 0, 0, 0 };
747
748 left_components = 4 - fine_location % 4;
749 if (glsl_type_is_64bit(type)) {
750 left_components /= 2;
751 assert(left_components > 0);
752 }
753 right_components = glsl_get_vector_elements(type) - left_components;
754
755 /* If set use previously set writemask to offset the following
756 * swizzle/writemasks. This can happen when spliting a dvec, etc across
757 * slots.
758 */
759 unsigned offset = 0;
760 if (writemask) {
761 for (unsigned i = 0; i < left_components; i++) {
762 /* Keep going until we find the first component of the write */
763 if (!(writemask & (1 << i))) {
764 offset++;
765 } else
766 break;
767 }
768 }
769
770 for (unsigned i = 0; i < left_components; i++) {
771 left_swizzle_values[i] = i + offset;
772 left_swizzle_name[i] = "xyzw"[i + offset];
773 }
774 for (unsigned i = 0; i < right_components; i++) {
775 right_swizzle_values[i] = i + left_components + offset;
776 right_swizzle_name[i] = "xyzw"[i + left_components + offset];
777 }
778
779 if (left_components) {
780 char *left_name = name ?
781 ralloc_asprintf(state->mem_ctx, "%s.%s", name, left_swizzle_name) :
782 NULL;
783
784 nir_ssa_def *left_swizzle = NULL;
785 unsigned left_writemask = ~0u;
786 if (state->mode == nir_var_shader_out) {
787 nir_ssa_def *ssa_def = rhs_swizzle ?
788 rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref);
789 left_swizzle =
790 nir_swizzle(&state->b, ssa_def,
791 left_swizzle_values, left_components);
792 } else {
793 left_writemask = ((1 << left_components) - 1) << offset;
794 }
795
796 const struct glsl_type *swiz_type =
797 glsl_vector_type(glsl_get_base_type(type), left_components);
798 fine_location = lower_varying(state, left_swizzle, left_writemask, swiz_type,
799 fine_location, unpacked_var, unpacked_var_deref,
800 left_name, false, vertex_index);
801 } else {
802 /* Top up the fine location to the next slot */
803 fine_location++;
804 }
805
806 char *right_name = name ?
807 ralloc_asprintf(state->mem_ctx, "%s.%s", name, right_swizzle_name) :
808 NULL;
809
810 nir_ssa_def *right_swizzle = NULL;
811 unsigned right_writemask = ~0u;
812 if (state->mode == nir_var_shader_out) {
813 nir_ssa_def *ssa_def = rhs_swizzle ?
814 rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref);
815 right_swizzle =
816 nir_swizzle(&state->b, ssa_def,
817 right_swizzle_values, right_components);
818 } else {
819 right_writemask = ((1 << right_components) - 1) << (left_components + offset);
820 }
821
822 const struct glsl_type *swiz_type =
823 glsl_vector_type(glsl_get_base_type(type), right_components);
824 return lower_varying(state, right_swizzle, right_writemask, swiz_type,
825 fine_location, unpacked_var, unpacked_var_deref,
826 right_name, false, vertex_index);
827 } else {
828 /* No special handling is necessary; (un)pack the old varying (now temp)
829 * from/into the new packed varying.
830 */
831 unsigned components = glsl_get_vector_elements(type) * dmul;
832 unsigned location = fine_location / 4;
833 unsigned location_frac = fine_location % 4;
834
835 assert(state->components[location - VARYING_SLOT_VAR0] >= components);
836 nir_deref_instr *packed_deref =
837 get_packed_varying_deref(state, location, unpacked_var, name,
838 vertex_index);
839
840 nir_variable *packed_var =
841 state->packed_varyings[location - VARYING_SLOT_VAR0];
842 if (unpacked_var->data.stream != 0) {
843 assert(unpacked_var->data.stream < 4);
844 for (unsigned i = 0; i < components; ++i) {
845 packed_var->data.stream |=
846 unpacked_var->data.stream << (2 * (location_frac + i));
847 }
848 }
849
850 struct packing_store_values *store_value;
851 if (state->mode == nir_var_shader_out) {
852 unsigned writemask = ((1 << components) - 1) << location_frac;
853 nir_ssa_def *value = rhs_swizzle ? rhs_swizzle :
854 nir_load_deref(&state->b, unpacked_var_deref);
855
856 store_value =
857 bitwise_assign_pack(state, packed_deref, unpacked_var_deref, type,
858 value, writemask);
859 } else {
860 unsigned swizzle_values[4] = { 0, 0, 0, 0 };
861 for (unsigned i = 0; i < components; ++i) {
862 swizzle_values[i] = i + location_frac;
863 }
864
865 nir_ssa_def *ssa_def = &packed_deref->dest.ssa;
866 ssa_def = nir_load_deref(&state->b, packed_deref);
867 nir_ssa_def *swizzle =
868 nir_swizzle(&state->b, ssa_def, swizzle_values, components);
869
870 store_value = bitwise_assign_unpack(state, unpacked_var_deref,
871 packed_deref, type, swizzle,
872 writemask);
873 }
874
875 create_store_deref(state, store_value->deref, store_value->values[0],
876 store_value->writemasks[0], store_value->is_64bit);
877 if (store_value->is_64bit) {
878 create_store_deref(state, store_value->deref, store_value->values[1],
879 store_value->writemasks[1], store_value->is_64bit);
880 }
881
882 free(store_value);
883 return fine_location + components;
884 }
885 }
886
887 /* Recursively pack varying. */
888 static void
pack_output_var(struct lower_packed_varyings_state * state,nir_variable * var)889 pack_output_var(struct lower_packed_varyings_state *state, nir_variable *var)
890 {
891 nir_deref_instr *unpacked_var_deref = nir_build_deref_var(&state->b, var);
892 lower_varying(state, NULL, ~0u, var->type,
893 var->data.location * 4 + var->data.location_frac,
894 var, unpacked_var_deref, var->name,
895 state->gs_input_vertices != 0, 0);
896 }
897
898 static void
lower_output_var(struct lower_packed_varyings_state * state,nir_variable * var)899 lower_output_var(struct lower_packed_varyings_state *state, nir_variable *var)
900 {
901 if (var->data.mode != state->mode ||
902 var->data.location < VARYING_SLOT_VAR0 || !needs_lowering(state, var))
903 return;
904
905 /* Skip any new packed varyings we just added */
906 if (strncmp("packed:", var->name, 7) == 0)
907 return;
908
909 /* This lowering pass is only capable of packing floats and ints
910 * together when their interpolation mode is "flat". Treat integers as
911 * being flat when the interpolation mode is none.
912 */
913 assert(var->data.interpolation == INTERP_MODE_FLAT ||
914 var->data.interpolation == INTERP_MODE_NONE ||
915 !glsl_contains_integer(var->type));
916
917 if (state->prog->SeparateShader && state->ifc_exposed_to_query_api) {
918 struct set *resource_set = _mesa_pointer_set_create(NULL);
919
920 nir_add_packed_var_to_resource_list(state->consts, state->prog,
921 resource_set, var,
922 state->shader->info.stage,
923 GL_PROGRAM_OUTPUT);
924
925 _mesa_set_destroy(resource_set, NULL);
926 }
927
928 /* Change the old varying into an ordinary global. */
929 var->data.mode = nir_var_shader_temp;
930
931 nir_foreach_block(block, state->impl) {
932 if (state->shader->info.stage != MESA_SHADER_GEOMETRY) {
933 /* For shaders other than geometry, outputs need to be lowered before
934 * each return statement and at the end of main()
935 */
936 if (nir_block_ends_in_return_or_halt(block)) {
937 state->b.cursor = nir_before_instr(nir_block_last_instr(block));
938 pack_output_var(state, var);
939 } else if (block == nir_impl_last_block(state->impl)) {
940 state->b.cursor = nir_after_block(block);
941 pack_output_var(state, var);
942 }
943 } else {
944 /* For geometry shaders, outputs need to be lowered before each call
945 * to EmitVertex()
946 */
947 nir_foreach_instr_safe(instr, block) {
948 if (instr->type != nir_instr_type_intrinsic)
949 continue;
950
951 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
952 if (intrin->intrinsic != nir_intrinsic_emit_vertex)
953 continue;
954
955 state->b.cursor = nir_before_instr(instr);
956 pack_output_var(state, var);
957 }
958 }
959 }
960 }
961
962 static void
lower_packed_outputs(struct lower_packed_varyings_state * state)963 lower_packed_outputs(struct lower_packed_varyings_state *state)
964 {
965 nir_foreach_shader_out_variable_safe(var, state->shader) {
966 lower_output_var(state, var);
967 }
968 }
969
970 static void
lower_packed_inputs(struct lower_packed_varyings_state * state)971 lower_packed_inputs(struct lower_packed_varyings_state *state)
972 {
973 /* Shader inputs need to be lowered at the beginning of main() so set bulder
974 * cursor to insert packing code at the start of the main function.
975 */
976 state->b.cursor = nir_before_block(nir_start_block(state->impl));
977
978 /* insert new varyings, lower old ones to locals and add unpacking code a
979 * the start of the shader.
980 */
981 nir_foreach_shader_in_variable_safe(var, state->shader) {
982 if (var->data.mode != state->mode ||
983 var->data.location < VARYING_SLOT_VAR0 || !needs_lowering(state, var))
984 continue;
985
986 /* Skip any new packed varyings we just added */
987 if (strncmp("packed:", var->name, 7) == 0)
988 continue;
989
990 /* This lowering pass is only capable of packing floats and ints
991 * together when their interpolation mode is "flat". Treat integers as
992 * being flat when the interpolation mode is none.
993 */
994 assert(var->data.interpolation == INTERP_MODE_FLAT ||
995 var->data.interpolation == INTERP_MODE_NONE ||
996 !glsl_contains_integer(var->type));
997
998 /* Program interface needs to expose varyings in case of SSO. Add the
999 * variable for program resource list before it gets modified and lost.
1000 */
1001 if (state->prog->SeparateShader && state->ifc_exposed_to_query_api) {
1002 struct set *resource_set = _mesa_pointer_set_create(NULL);
1003
1004 nir_add_packed_var_to_resource_list(state->consts, state->prog,
1005 resource_set, var,
1006 state->shader->info.stage,
1007 GL_PROGRAM_INPUT);
1008
1009 _mesa_set_destroy(resource_set, NULL);
1010 }
1011
1012 /* Change the old varying into an ordinary global. */
1013 var->data.mode = nir_var_shader_temp;
1014
1015 /* Recursively unpack varying. */
1016 nir_deref_instr *unpacked_var_deref = nir_build_deref_var(&state->b, var);
1017 lower_varying(state, NULL, ~0u, var->type,
1018 var->data.location * 4 + var->data.location_frac,
1019 var, unpacked_var_deref, var->name,
1020 state->gs_input_vertices != 0, 0);
1021 }
1022 }
1023
1024 void
gl_nir_lower_packed_varyings(const struct gl_constants * consts,struct gl_shader_program * prog,void * mem_ctx,unsigned locations_used,const uint8_t * components,nir_variable_mode mode,unsigned gs_input_vertices,struct gl_linked_shader * linked_shader,bool disable_varying_packing,bool disable_xfb_packing,bool xfb_enabled)1025 gl_nir_lower_packed_varyings(const struct gl_constants *consts,
1026 struct gl_shader_program *prog,
1027 void *mem_ctx, unsigned locations_used,
1028 const uint8_t *components,
1029 nir_variable_mode mode, unsigned gs_input_vertices,
1030 struct gl_linked_shader *linked_shader,
1031 bool disable_varying_packing,
1032 bool disable_xfb_packing, bool xfb_enabled)
1033 {
1034 struct lower_packed_varyings_state state;
1035 nir_shader *shader = linked_shader->Program->nir;
1036 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1037
1038 assert(shader->info.stage != MESA_SHADER_COMPUTE);
1039
1040 /* assert that functions have been inlined before packing is called */
1041 nir_foreach_function(f, shader) {
1042 assert(f->impl == impl);
1043 }
1044
1045 nir_builder_init(&state.b, impl);
1046 state.consts = consts;
1047 state.prog = prog;
1048 state.mem_ctx = mem_ctx;
1049 state.shader = shader;
1050 state.impl = impl;
1051 state.locations_used = locations_used;
1052 state.components = components;
1053 state.mode = mode;
1054 state.gs_input_vertices = gs_input_vertices;
1055 state.disable_varying_packing = disable_varying_packing;
1056 state.disable_xfb_packing = disable_xfb_packing;
1057 state.xfb_enabled = xfb_enabled;
1058 state.packed_varyings =
1059 (nir_variable **) rzalloc_array_size(mem_ctx, sizeof(nir_variable *),
1060 locations_used);
1061
1062 /* Determine if the shader interface is exposed to api query */
1063 struct gl_linked_shader *linked_shaders[MESA_SHADER_STAGES];
1064 unsigned num_shaders = 0;
1065 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
1066 if (prog->_LinkedShaders[i])
1067 linked_shaders[num_shaders++] = prog->_LinkedShaders[i];
1068 }
1069
1070 if (mode == nir_var_shader_in) {
1071 state.ifc_exposed_to_query_api = linked_shaders[0] == linked_shader;
1072 lower_packed_inputs(&state);
1073 } else {
1074 state.ifc_exposed_to_query_api =
1075 linked_shaders[num_shaders - 1] == linked_shader;
1076 lower_packed_outputs(&state);
1077 }
1078
1079 nir_lower_global_vars_to_local(shader);
1080 nir_fixup_deref_modes(shader);
1081 }
1082