• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 
28 /**
29  * This file contains two different lowering passes.
30  *
31  * 1. nir_lower_clip_cull_distance_arrays()
32  *
33  *    This pass combines clip and cull distance arrays in separate locations
34  *    and colocates them both in VARYING_SLOT_CLIP_DIST0.  It does so by
35  *    maintaining two arrays but making them compact and using location_frac
36  *    to stack them on top of each other.
37  *
38  * 2. nir_lower_clip_cull_distance_to_vec4s()
39  *
40  *    This pass accounts for the difference between the way
41  *    gl_ClipDistance is declared in standard GLSL (as an array of
42  *    floats), and the way it is frequently implemented in hardware (as
43  *    a pair of vec4s, with four clip distances packed into each).
44  *
45  *    The declaration of gl_ClipDistance is replaced with a declaration
46  *    of gl_ClipDistanceMESA, and any references to gl_ClipDistance are
47  *    translated to refer to gl_ClipDistanceMESA with the appropriate
48  *    swizzling of array indices.  For instance:
49  *
50  *      gl_ClipDistance[i]
51  *
52  *    is translated into:
53  *
54  *      gl_ClipDistanceMESA[i>>2][i&3]
55  */
56 
57 #define GLSL_CLIP_VAR_NAME "gl_ClipDistanceMESA"
58 
59 struct lower_distance_state {
60    /**
61     * Pointer to the declaration of gl_ClipDistance, if found.
62     *
63     * Note:
64     *
65     * - the in_var is for geometry and both tessellation shader inputs only.
66     *
67     * - since gl_ClipDistance is available in tessellation control,
68     *   tessellation evaluation and geometry shaders as both an input
69     *   and an output, it's possible for both old_distance_out_var
70     *   and old_distance_in_var to be non-null.
71     */
72    nir_variable *old_distance_out_var;
73    nir_variable *old_distance_in_var;
74 
75    /**
76     * Pointer to the newly-created gl_ClipDistanceMESA variable.
77     */
78    nir_variable *new_distance_out_var;
79    nir_variable *new_distance_in_var;
80 
81    /**
82     * Type of shader we are compiling (e.g. MESA_SHADER_VERTEX)
83     */
84    gl_shader_stage shader_stage;
85    const char *in_name;
86    int total_size;
87    int offset;
88 };
89 
90 /**
91  * Get the length of the clip/cull distance array, looking past
92  * any interface block arrays.
93  */
94 static unsigned
get_unwrapped_array_length(nir_shader * nir,nir_variable * var)95 get_unwrapped_array_length(nir_shader *nir, nir_variable *var)
96 {
97    if (!var)
98       return 0;
99 
100    /* Unwrap GS input and TCS input/output interfaces.  We want the
101     * underlying clip/cull distance array length, not the per-vertex
102     * array length.
103     */
104    const struct glsl_type *type = var->type;
105    if (nir_is_arrayed_io(var, nir->info.stage))
106       type = glsl_get_array_element(type);
107 
108    assert(glsl_type_is_array(type));
109 
110    return glsl_get_length(type);
111 }
112 
113 /**
114  * Replace any declaration of 'in_name' as an array of floats with a
115  * declaration of gl_ClipDistanceMESA as an array of vec4's.
116  */
117 static void
replace_var_declaration(struct lower_distance_state * state,nir_shader * sh,nir_variable * var,const char * in_name)118 replace_var_declaration(struct lower_distance_state *state, nir_shader *sh,
119                         nir_variable *var, const char *in_name)
120 {
121    nir_variable **old_var;
122    nir_variable **new_var;
123 
124    if (!var->name || strcmp(var->name, in_name) != 0)
125       return;
126 
127    assert(glsl_type_is_array(var->type));
128    if (var->data.mode == nir_var_shader_out) {
129       if (state->old_distance_out_var)
130          return;
131 
132       old_var = &state->old_distance_out_var;
133       new_var = &state->new_distance_out_var;
134    } else if (var->data.mode == nir_var_shader_in) {
135       if (state->old_distance_in_var)
136          return;
137 
138       old_var = &state->old_distance_in_var;
139       new_var = &state->new_distance_in_var;
140    } else {
141       unreachable("not reached");
142    }
143 
144    *old_var = var;
145 
146    if (!(*new_var)) {
147       unsigned new_size = (state->total_size + 3) / 4;
148 
149       *new_var = rzalloc(sh, nir_variable);
150       (*new_var)->name = ralloc_strdup(*new_var, GLSL_CLIP_VAR_NAME);
151       (*new_var)->data.mode = var->data.mode;
152       (*new_var)->data.location = VARYING_SLOT_CLIP_DIST0;
153       (*new_var)->data.assigned = true;
154       (*new_var)->data.how_declared = var->data.how_declared;
155 
156       nir_shader_add_variable(sh, *new_var);
157 
158       if (!glsl_type_is_array(glsl_get_array_element(var->type))) {
159          /* gl_ClipDistance (used for vertex, tessellation evaluation and
160           * geometry output, and fragment input).
161           */
162          assert((var->data.mode == nir_var_shader_in &&
163                  sh->info.stage == MESA_SHADER_FRAGMENT) ||
164                 (var->data.mode == nir_var_shader_out &&
165                  (sh->info.stage == MESA_SHADER_VERTEX ||
166                   sh->info.stage == MESA_SHADER_TESS_EVAL ||
167                   sh->info.stage == MESA_SHADER_GEOMETRY)));
168 
169          assert(glsl_get_base_type(glsl_get_array_element(var->type)) ==
170                 GLSL_TYPE_FLOAT);
171 
172          /* And change the properties that we need to change */
173          (*new_var)->type = glsl_array_type(glsl_vec4_type(), new_size, 0);
174       } else {
175          /* 2D gl_ClipDistance (used for tessellation control, tessellation
176           * evaluation and geometry input, and tessellation control output).
177           */
178          assert((var->data.mode == nir_var_shader_in &&
179                  (sh->info.stage == MESA_SHADER_GEOMETRY ||
180                   sh->info.stage == MESA_SHADER_TESS_EVAL)) ||
181                 sh->info.stage == MESA_SHADER_TESS_CTRL);
182 
183          assert (glsl_get_base_type(glsl_get_array_element(glsl_get_array_element(var->type))) ==
184                  GLSL_TYPE_FLOAT);
185 
186          /* And change the properties that we need to change */
187          (*new_var)->type =
188             glsl_array_type(glsl_array_type(glsl_vec4_type(), new_size, 0),
189                             glsl_array_size(var->type), 0);
190       }
191    }
192 }
193 
194 static nir_def *
interp_deref(nir_builder * b,nir_intrinsic_instr * old_intrin,nir_deref_instr * deref)195 interp_deref(nir_builder *b, nir_intrinsic_instr *old_intrin,
196              nir_deref_instr *deref)
197 {
198    nir_intrinsic_instr *intrin =
199       nir_intrinsic_instr_create(b->shader, old_intrin->intrinsic);
200    intrin->num_components = 4;
201    intrin->src[0] = nir_src_for_ssa(&deref->def);
202 
203    if (intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
204        intrin->intrinsic == nir_intrinsic_interp_deref_at_sample)
205       intrin->src[1] = nir_src_for_ssa(old_intrin->src[1].ssa);
206 
207    nir_def_init(&intrin->instr, &intrin->def, 4, 32);
208    nir_builder_instr_insert(b, &intrin->instr);
209 
210    return &intrin->def;
211 }
212 
213 /* Replace any expression that indexes one of the floats in gl_ClipDistance
214  * with an expression that indexes into one of the vec4's in
215  * gl_ClipDistanceMESA and accesses the appropriate component.
216  */
217 static void
lower_distance_deref(struct lower_distance_state * state,nir_builder * b,nir_intrinsic_instr * intrin,nir_deref_instr * deref,nir_variable * new_var)218 lower_distance_deref(struct lower_distance_state *state, nir_builder *b,
219                      nir_intrinsic_instr *intrin, nir_deref_instr *deref,
220                      nir_variable *new_var)
221 {
222    nir_deref_path path;
223    nir_deref_path_init(&path, deref, NULL);
224 
225    assert(path.path[0]->deref_type == nir_deref_type_var);
226    nir_deref_instr **p = &path.path[1];
227 
228    b->cursor = nir_before_instr(&intrin->instr);
229    nir_deref_instr *deref_var = nir_build_deref_var(b, new_var);
230 
231    /* Handle 2D arrays such as Geom shader inputs */
232    if (glsl_type_is_array(glsl_get_array_element(new_var->type))) {
233       assert((*p)->deref_type == nir_deref_type_array);
234       deref_var = nir_build_deref_array(b, deref_var, (*p)->arr.index.ssa);
235       p++;
236    }
237 
238    assert((*p)->deref_type == nir_deref_type_array);
239 
240    /**
241     * Create the necessary values to index into gl_ClipDistanceMESA based
242     * on the value previously used to index into gl_ClipDistance.
243     *
244     * An array index selects one of the vec4's in gl_ClipDistanceMESA
245     * a swizzle then selects a component within the selected vec4.
246     */
247    nir_src old_index = (*p)->arr.index;
248    if (nir_src_is_const(old_index)) {
249       unsigned const_val = nir_src_as_uint(old_index) + state->offset;
250       unsigned swizzle = const_val % 4;
251 
252       nir_deref_instr *def_arr_instr =
253          nir_build_deref_array_imm(b, deref_var, const_val / 4);
254 
255       if (intrin->intrinsic == nir_intrinsic_store_deref) {
256          nir_def *value = intrin->src[1].ssa;
257          nir_build_write_masked_store(b, def_arr_instr, value, swizzle);
258       } else {
259          assert(intrin->intrinsic == nir_intrinsic_load_deref ||
260                 intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid ||
261                 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
262                 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset);
263 
264          nir_def *load_def;
265          if (intrin->intrinsic == nir_intrinsic_load_deref)
266             load_def = nir_load_deref(b, def_arr_instr);
267          else
268             load_def = interp_deref(b, intrin, def_arr_instr);
269 
270          nir_def *swz = nir_channel(b, load_def, swizzle);
271          nir_def_rewrite_uses(&intrin->def, swz);
272       }
273    } else {
274       nir_def *index = nir_iadd_imm(b, old_index.ssa, state->offset);
275       nir_def *swizzle = nir_umod_imm(b, index, 4);
276       index = nir_ishr_imm(b, index, 2); /* index / 4 */
277 
278       nir_deref_instr *def_arr_instr =
279          nir_build_deref_array(b, deref_var, index);
280 
281       if (intrin->intrinsic == nir_intrinsic_store_deref) {
282          nir_def *value = intrin->src[1].ssa;
283          nir_build_write_masked_stores(b, def_arr_instr, value, swizzle, 0, 4);
284       } else {
285          assert(intrin->intrinsic == nir_intrinsic_load_deref ||
286                 intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid ||
287                 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
288                 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset);
289 
290          nir_def *load_def;
291          if (intrin->intrinsic == nir_intrinsic_load_deref)
292             load_def = nir_load_deref(b, def_arr_instr);
293          else
294             load_def = interp_deref(b, intrin, def_arr_instr);
295 
296          nir_def *swz = nir_vector_extract(b, load_def, swizzle);
297          nir_def_rewrite_uses(&intrin->def, swz);
298       }
299    }
300 
301    nir_deref_path_finish(&path);
302 }
303 
304 static bool
replace_with_derefs_to_vec4(nir_builder * b,nir_intrinsic_instr * intr,void * cb_data)305 replace_with_derefs_to_vec4(nir_builder *b, nir_intrinsic_instr *intr,
306                             void *cb_data)
307 {
308    struct lower_distance_state *state =
309       (struct lower_distance_state *) cb_data;
310    nir_variable_mode mask = nir_var_shader_in | nir_var_shader_out;
311 
312    /* Copy deref lowering is expected to happen before we get here */
313    assert(intr->intrinsic != nir_intrinsic_copy_deref);
314    assert(intr->intrinsic != nir_intrinsic_interp_deref_at_vertex);
315 
316    if (intr->intrinsic != nir_intrinsic_load_deref &&
317        intr->intrinsic != nir_intrinsic_store_deref &&
318        intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
319        intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
320        intr->intrinsic != nir_intrinsic_interp_deref_at_offset)
321       return false;
322 
323    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
324    if (!nir_deref_mode_is_one_of(deref, mask))
325       return false;
326 
327    nir_variable *var = nir_deref_instr_get_variable(deref);
328 
329    /* The var has already been lowered to a temp so the derefs have already
330     * been replaced. We can end up here when a shader has both clip and cull
331     * arrays.
332     */
333    if (var->data.mode != nir_var_shader_in &&
334        var->data.mode != nir_var_shader_out)
335       return false;
336 
337    if (var->data.mode == nir_var_shader_out &&
338       var != state->old_distance_out_var)
339       return false;
340 
341    if (var->data.mode == nir_var_shader_in &&
342        var != state->old_distance_in_var)
343       return false;
344 
345    nir_variable *new_var = var->data.mode == nir_var_shader_in ?
346       state->new_distance_in_var : state->new_distance_out_var;
347 
348    lower_distance_deref(state, b, intr, deref, new_var);
349 
350    return true;
351 }
352 
353 static void
lower_distance_to_vec4(nir_shader * shader,struct lower_distance_state * state)354 lower_distance_to_vec4(nir_shader *shader, struct lower_distance_state *state)
355 {
356    /* Replace declarations */
357    nir_foreach_variable_with_modes_safe(var, shader,
358                                         nir_var_shader_in | nir_var_shader_out) {
359       replace_var_declaration(state, shader, var, state->in_name);
360    }
361 
362    if (!state->old_distance_in_var && !state->old_distance_out_var)
363       return;
364 
365    /* Replace derefs, we may have indirect store lowering which will change
366     * control flow of the shader.
367     */
368    nir_shader_intrinsics_pass(shader, replace_with_derefs_to_vec4,
369                               nir_metadata_none, state);
370 
371    /* Mark now lowered vars as ordinary globals to be dead code eliminated.
372     * Also clear the compact flag to avoid issues with validation.
373     */
374    if (state->old_distance_out_var) {
375       state->old_distance_out_var->data.mode = nir_var_shader_temp;
376       state->old_distance_out_var->data.compact = false;
377    }
378 
379    if (state->old_distance_in_var) {
380       state->old_distance_in_var->data.mode = nir_var_shader_temp;
381       state->old_distance_in_var->data.compact = false;
382    }
383 }
384 
385 bool
nir_lower_clip_cull_distance_to_vec4s(nir_shader * shader)386 nir_lower_clip_cull_distance_to_vec4s(nir_shader *shader)
387 {
388    int clip_size = 0;
389    int cull_size = 0;
390 
391    nir_variable_mode mode = nir_var_shader_in | nir_var_shader_out;
392    nir_foreach_variable_with_modes(var, shader, mode) {
393       if ((var->data.mode == nir_var_shader_in &&
394            shader->info.stage == MESA_SHADER_VERTEX) ||
395           (var->data.mode == nir_var_shader_out &&
396            shader->info.stage == MESA_SHADER_FRAGMENT) ||
397           shader->info.stage == MESA_SHADER_COMPUTE)
398          continue;
399 
400 
401       if (var->data.location == VARYING_SLOT_CLIP_DIST0)
402          clip_size = MAX2(clip_size, get_unwrapped_array_length(shader, var));
403 
404       if (var->data.location == VARYING_SLOT_CULL_DIST0)
405          cull_size = MAX2(cull_size, get_unwrapped_array_length(shader, var));
406    }
407 
408    if (clip_size == 0 && cull_size == 0) {
409       nir_shader_preserve_all_metadata(shader);
410       return false;
411    }
412 
413    struct lower_distance_state state;
414    state.old_distance_out_var = NULL;
415    state.old_distance_in_var = NULL;
416    state.new_distance_out_var = NULL;
417    state.new_distance_in_var = NULL;
418    state.shader_stage = shader->info.stage;
419    state.in_name = "gl_ClipDistance";
420    state.total_size = clip_size + cull_size;
421    state.offset = 0;
422    lower_distance_to_vec4(shader, &state);
423 
424    state.old_distance_out_var = NULL;
425    state.old_distance_in_var = NULL;
426    state.in_name ="gl_CullDistance";
427    state.offset = clip_size;
428    lower_distance_to_vec4(shader, &state);
429 
430    nir_fixup_deref_modes(shader);
431 
432    /* Assume we made progress */
433    return true;
434 }
435 
436 static bool
combine_clip_cull(nir_shader * nir,nir_variable_mode mode,bool store_info)437 combine_clip_cull(nir_shader *nir,
438                   nir_variable_mode mode,
439                   bool store_info)
440 {
441    nir_variable *cull = NULL;
442    nir_variable *clip = NULL;
443 
444    nir_foreach_variable_with_modes(var, nir, mode) {
445       if (var->data.location == VARYING_SLOT_CLIP_DIST0)
446          clip = var;
447 
448       if (var->data.location == VARYING_SLOT_CULL_DIST0)
449          cull = var;
450    }
451 
452    if (!cull && !clip) {
453       /* If this is run after optimizations and the variables have been
454        * eliminated, we should update the shader info, because no other
455        * place does that.
456        */
457       if (store_info) {
458          nir->info.clip_distance_array_size = 0;
459          nir->info.cull_distance_array_size = 0;
460       }
461       return false;
462    }
463 
464    if (!cull && clip) {
465       /* The GLSL IR lowering pass must have converted these to vectors */
466       if (!clip->data.compact)
467          return false;
468 
469       /* If this pass has already run, don't repeat.  We would think that
470        * the combined clip/cull distance array was clip-only and mess up.
471        */
472       if (clip->data.how_declared == nir_var_hidden)
473          return false;
474    }
475 
476    const unsigned clip_array_size = get_unwrapped_array_length(nir, clip);
477    const unsigned cull_array_size = get_unwrapped_array_length(nir, cull);
478 
479    if (store_info) {
480       nir->info.clip_distance_array_size = clip_array_size;
481       nir->info.cull_distance_array_size = cull_array_size;
482    }
483 
484    if (clip) {
485       assert(clip->data.compact);
486       clip->data.how_declared = nir_var_hidden;
487    }
488 
489    if (cull) {
490       assert(cull->data.compact);
491       cull->data.how_declared = nir_var_hidden;
492       cull->data.location = VARYING_SLOT_CLIP_DIST0 + clip_array_size / 4;
493       cull->data.location_frac = clip_array_size % 4;
494    }
495 
496    return true;
497 }
498 
499 bool
nir_lower_clip_cull_distance_arrays(nir_shader * nir)500 nir_lower_clip_cull_distance_arrays(nir_shader *nir)
501 {
502    bool progress = false;
503 
504    if (nir->info.stage <= MESA_SHADER_GEOMETRY ||
505        nir->info.stage == MESA_SHADER_MESH)
506       progress |= combine_clip_cull(nir, nir_var_shader_out, true);
507 
508    if (nir->info.stage > MESA_SHADER_VERTEX &&
509        nir->info.stage <= MESA_SHADER_FRAGMENT) {
510       progress |= combine_clip_cull(nir, nir_var_shader_in,
511                                     nir->info.stage == MESA_SHADER_FRAGMENT);
512    }
513 
514    nir_foreach_function_impl(impl, nir) {
515       if (progress) {
516          nir_metadata_preserve(impl,
517                                nir_metadata_control_flow |
518                                nir_metadata_live_defs |
519                                nir_metadata_loop_analysis);
520       } else {
521          nir_metadata_preserve(impl, nir_metadata_all);
522       }
523    }
524 
525    return progress;
526 }
527