1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27
28 /**
29 * This file contains two different lowering passes.
30 *
31 * 1. nir_lower_clip_cull_distance_arrays()
32 *
33 * This pass combines clip and cull distance arrays in separate locations
34 * and colocates them both in VARYING_SLOT_CLIP_DIST0. It does so by
35 * maintaining two arrays but making them compact and using location_frac
36 * to stack them on top of each other.
37 *
38 * 2. nir_lower_clip_cull_distance_to_vec4s()
39 *
40 * This pass accounts for the difference between the way
41 * gl_ClipDistance is declared in standard GLSL (as an array of
42 * floats), and the way it is frequently implemented in hardware (as
43 * a pair of vec4s, with four clip distances packed into each).
44 *
45 * The declaration of gl_ClipDistance is replaced with a declaration
46 * of gl_ClipDistanceMESA, and any references to gl_ClipDistance are
47 * translated to refer to gl_ClipDistanceMESA with the appropriate
48 * swizzling of array indices. For instance:
49 *
50 * gl_ClipDistance[i]
51 *
52 * is translated into:
53 *
54 * gl_ClipDistanceMESA[i>>2][i&3]
55 */
56
57 #define GLSL_CLIP_VAR_NAME "gl_ClipDistanceMESA"
58
59 struct lower_distance_state {
60 /**
61 * Pointer to the declaration of gl_ClipDistance, if found.
62 *
63 * Note:
64 *
65 * - the in_var is for geometry and both tessellation shader inputs only.
66 *
67 * - since gl_ClipDistance is available in tessellation control,
68 * tessellation evaluation and geometry shaders as both an input
69 * and an output, it's possible for both old_distance_out_var
70 * and old_distance_in_var to be non-null.
71 */
72 nir_variable *old_distance_out_var;
73 nir_variable *old_distance_in_var;
74
75 /**
76 * Pointer to the newly-created gl_ClipDistanceMESA variable.
77 */
78 nir_variable *new_distance_out_var;
79 nir_variable *new_distance_in_var;
80
81 /**
82 * Type of shader we are compiling (e.g. MESA_SHADER_VERTEX)
83 */
84 gl_shader_stage shader_stage;
85 const char *in_name;
86 int total_size;
87 int offset;
88 };
89
90 /**
91 * Get the length of the clip/cull distance array, looking past
92 * any interface block arrays.
93 */
94 static unsigned
get_unwrapped_array_length(nir_shader * nir,nir_variable * var)95 get_unwrapped_array_length(nir_shader *nir, nir_variable *var)
96 {
97 if (!var)
98 return 0;
99
100 /* Unwrap GS input and TCS input/output interfaces. We want the
101 * underlying clip/cull distance array length, not the per-vertex
102 * array length.
103 */
104 const struct glsl_type *type = var->type;
105 if (nir_is_arrayed_io(var, nir->info.stage))
106 type = glsl_get_array_element(type);
107
108 assert(glsl_type_is_array(type));
109
110 return glsl_get_length(type);
111 }
112
113 /**
114 * Replace any declaration of 'in_name' as an array of floats with a
115 * declaration of gl_ClipDistanceMESA as an array of vec4's.
116 */
117 static void
replace_var_declaration(struct lower_distance_state * state,nir_shader * sh,nir_variable * var,const char * in_name)118 replace_var_declaration(struct lower_distance_state *state, nir_shader *sh,
119 nir_variable *var, const char *in_name)
120 {
121 nir_variable **old_var;
122 nir_variable **new_var;
123
124 if (!var->name || strcmp(var->name, in_name) != 0)
125 return;
126
127 assert(glsl_type_is_array(var->type));
128 if (var->data.mode == nir_var_shader_out) {
129 if (state->old_distance_out_var)
130 return;
131
132 old_var = &state->old_distance_out_var;
133 new_var = &state->new_distance_out_var;
134 } else if (var->data.mode == nir_var_shader_in) {
135 if (state->old_distance_in_var)
136 return;
137
138 old_var = &state->old_distance_in_var;
139 new_var = &state->new_distance_in_var;
140 } else {
141 unreachable("not reached");
142 }
143
144 *old_var = var;
145
146 if (!(*new_var)) {
147 unsigned new_size = (state->total_size + 3) / 4;
148
149 *new_var = rzalloc(sh, nir_variable);
150 (*new_var)->name = ralloc_strdup(*new_var, GLSL_CLIP_VAR_NAME);
151 (*new_var)->data.mode = var->data.mode;
152 (*new_var)->data.location = VARYING_SLOT_CLIP_DIST0;
153 (*new_var)->data.assigned = true;
154 (*new_var)->data.how_declared = var->data.how_declared;
155
156 nir_shader_add_variable(sh, *new_var);
157
158 if (!glsl_type_is_array(glsl_get_array_element(var->type))) {
159 /* gl_ClipDistance (used for vertex, tessellation evaluation and
160 * geometry output, and fragment input).
161 */
162 assert((var->data.mode == nir_var_shader_in &&
163 sh->info.stage == MESA_SHADER_FRAGMENT) ||
164 (var->data.mode == nir_var_shader_out &&
165 (sh->info.stage == MESA_SHADER_VERTEX ||
166 sh->info.stage == MESA_SHADER_TESS_EVAL ||
167 sh->info.stage == MESA_SHADER_GEOMETRY)));
168
169 assert(glsl_get_base_type(glsl_get_array_element(var->type)) ==
170 GLSL_TYPE_FLOAT);
171
172 /* And change the properties that we need to change */
173 (*new_var)->type = glsl_array_type(glsl_vec4_type(), new_size, 0);
174 } else {
175 /* 2D gl_ClipDistance (used for tessellation control, tessellation
176 * evaluation and geometry input, and tessellation control output).
177 */
178 assert((var->data.mode == nir_var_shader_in &&
179 (sh->info.stage == MESA_SHADER_GEOMETRY ||
180 sh->info.stage == MESA_SHADER_TESS_EVAL)) ||
181 sh->info.stage == MESA_SHADER_TESS_CTRL);
182
183 assert (glsl_get_base_type(glsl_get_array_element(glsl_get_array_element(var->type))) ==
184 GLSL_TYPE_FLOAT);
185
186 /* And change the properties that we need to change */
187 (*new_var)->type =
188 glsl_array_type(glsl_array_type(glsl_vec4_type(), new_size, 0),
189 glsl_array_size(var->type), 0);
190 }
191 }
192 }
193
194 static nir_def *
interp_deref(nir_builder * b,nir_intrinsic_instr * old_intrin,nir_deref_instr * deref)195 interp_deref(nir_builder *b, nir_intrinsic_instr *old_intrin,
196 nir_deref_instr *deref)
197 {
198 nir_intrinsic_instr *intrin =
199 nir_intrinsic_instr_create(b->shader, old_intrin->intrinsic);
200 intrin->num_components = 4;
201 intrin->src[0] = nir_src_for_ssa(&deref->def);
202
203 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
204 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample)
205 intrin->src[1] = nir_src_for_ssa(old_intrin->src[1].ssa);
206
207 nir_def_init(&intrin->instr, &intrin->def, 4, 32);
208 nir_builder_instr_insert(b, &intrin->instr);
209
210 return &intrin->def;
211 }
212
213 /* Replace any expression that indexes one of the floats in gl_ClipDistance
214 * with an expression that indexes into one of the vec4's in
215 * gl_ClipDistanceMESA and accesses the appropriate component.
216 */
217 static void
lower_distance_deref(struct lower_distance_state * state,nir_builder * b,nir_intrinsic_instr * intrin,nir_deref_instr * deref,nir_variable * new_var)218 lower_distance_deref(struct lower_distance_state *state, nir_builder *b,
219 nir_intrinsic_instr *intrin, nir_deref_instr *deref,
220 nir_variable *new_var)
221 {
222 nir_deref_path path;
223 nir_deref_path_init(&path, deref, NULL);
224
225 assert(path.path[0]->deref_type == nir_deref_type_var);
226 nir_deref_instr **p = &path.path[1];
227
228 b->cursor = nir_before_instr(&intrin->instr);
229 nir_deref_instr *deref_var = nir_build_deref_var(b, new_var);
230
231 /* Handle 2D arrays such as Geom shader inputs */
232 if (glsl_type_is_array(glsl_get_array_element(new_var->type))) {
233 assert((*p)->deref_type == nir_deref_type_array);
234 deref_var = nir_build_deref_array(b, deref_var, (*p)->arr.index.ssa);
235 p++;
236 }
237
238 assert((*p)->deref_type == nir_deref_type_array);
239
240 /**
241 * Create the necessary values to index into gl_ClipDistanceMESA based
242 * on the value previously used to index into gl_ClipDistance.
243 *
244 * An array index selects one of the vec4's in gl_ClipDistanceMESA
245 * a swizzle then selects a component within the selected vec4.
246 */
247 nir_src old_index = (*p)->arr.index;
248 if (nir_src_is_const(old_index)) {
249 unsigned const_val = nir_src_as_uint(old_index) + state->offset;
250 unsigned swizzle = const_val % 4;
251
252 nir_deref_instr *def_arr_instr =
253 nir_build_deref_array_imm(b, deref_var, const_val / 4);
254
255 if (intrin->intrinsic == nir_intrinsic_store_deref) {
256 nir_def *value = intrin->src[1].ssa;
257 nir_build_write_masked_store(b, def_arr_instr, value, swizzle);
258 } else {
259 assert(intrin->intrinsic == nir_intrinsic_load_deref ||
260 intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid ||
261 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
262 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset);
263
264 nir_def *load_def;
265 if (intrin->intrinsic == nir_intrinsic_load_deref)
266 load_def = nir_load_deref(b, def_arr_instr);
267 else
268 load_def = interp_deref(b, intrin, def_arr_instr);
269
270 nir_def *swz = nir_channel(b, load_def, swizzle);
271 nir_def_rewrite_uses(&intrin->def, swz);
272 }
273 } else {
274 nir_def *index = nir_iadd_imm(b, old_index.ssa, state->offset);
275 nir_def *swizzle = nir_umod_imm(b, index, 4);
276 index = nir_ishr_imm(b, index, 2); /* index / 4 */
277
278 nir_deref_instr *def_arr_instr =
279 nir_build_deref_array(b, deref_var, index);
280
281 if (intrin->intrinsic == nir_intrinsic_store_deref) {
282 nir_def *value = intrin->src[1].ssa;
283 nir_build_write_masked_stores(b, def_arr_instr, value, swizzle, 0, 4);
284 } else {
285 assert(intrin->intrinsic == nir_intrinsic_load_deref ||
286 intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid ||
287 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
288 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset);
289
290 nir_def *load_def;
291 if (intrin->intrinsic == nir_intrinsic_load_deref)
292 load_def = nir_load_deref(b, def_arr_instr);
293 else
294 load_def = interp_deref(b, intrin, def_arr_instr);
295
296 nir_def *swz = nir_vector_extract(b, load_def, swizzle);
297 nir_def_rewrite_uses(&intrin->def, swz);
298 }
299 }
300
301 nir_deref_path_finish(&path);
302 }
303
304 static bool
replace_with_derefs_to_vec4(nir_builder * b,nir_intrinsic_instr * intr,void * cb_data)305 replace_with_derefs_to_vec4(nir_builder *b, nir_intrinsic_instr *intr,
306 void *cb_data)
307 {
308 struct lower_distance_state *state =
309 (struct lower_distance_state *) cb_data;
310 nir_variable_mode mask = nir_var_shader_in | nir_var_shader_out;
311
312 /* Copy deref lowering is expected to happen before we get here */
313 assert(intr->intrinsic != nir_intrinsic_copy_deref);
314 assert(intr->intrinsic != nir_intrinsic_interp_deref_at_vertex);
315
316 if (intr->intrinsic != nir_intrinsic_load_deref &&
317 intr->intrinsic != nir_intrinsic_store_deref &&
318 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
319 intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
320 intr->intrinsic != nir_intrinsic_interp_deref_at_offset)
321 return false;
322
323 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
324 if (!nir_deref_mode_is_one_of(deref, mask))
325 return false;
326
327 nir_variable *var = nir_deref_instr_get_variable(deref);
328
329 /* The var has already been lowered to a temp so the derefs have already
330 * been replaced. We can end up here when a shader has both clip and cull
331 * arrays.
332 */
333 if (var->data.mode != nir_var_shader_in &&
334 var->data.mode != nir_var_shader_out)
335 return false;
336
337 if (var->data.mode == nir_var_shader_out &&
338 var != state->old_distance_out_var)
339 return false;
340
341 if (var->data.mode == nir_var_shader_in &&
342 var != state->old_distance_in_var)
343 return false;
344
345 nir_variable *new_var = var->data.mode == nir_var_shader_in ?
346 state->new_distance_in_var : state->new_distance_out_var;
347
348 lower_distance_deref(state, b, intr, deref, new_var);
349
350 return true;
351 }
352
353 static void
lower_distance_to_vec4(nir_shader * shader,struct lower_distance_state * state)354 lower_distance_to_vec4(nir_shader *shader, struct lower_distance_state *state)
355 {
356 /* Replace declarations */
357 nir_foreach_variable_with_modes_safe(var, shader,
358 nir_var_shader_in | nir_var_shader_out) {
359 replace_var_declaration(state, shader, var, state->in_name);
360 }
361
362 if (!state->old_distance_in_var && !state->old_distance_out_var)
363 return;
364
365 /* Replace derefs, we may have indirect store lowering which will change
366 * control flow of the shader.
367 */
368 nir_shader_intrinsics_pass(shader, replace_with_derefs_to_vec4,
369 nir_metadata_none, state);
370
371 /* Mark now lowered vars as ordinary globals to be dead code eliminated.
372 * Also clear the compact flag to avoid issues with validation.
373 */
374 if (state->old_distance_out_var) {
375 state->old_distance_out_var->data.mode = nir_var_shader_temp;
376 state->old_distance_out_var->data.compact = false;
377 }
378
379 if (state->old_distance_in_var) {
380 state->old_distance_in_var->data.mode = nir_var_shader_temp;
381 state->old_distance_in_var->data.compact = false;
382 }
383 }
384
385 bool
nir_lower_clip_cull_distance_to_vec4s(nir_shader * shader)386 nir_lower_clip_cull_distance_to_vec4s(nir_shader *shader)
387 {
388 int clip_size = 0;
389 int cull_size = 0;
390
391 nir_variable_mode mode = nir_var_shader_in | nir_var_shader_out;
392 nir_foreach_variable_with_modes(var, shader, mode) {
393 if ((var->data.mode == nir_var_shader_in &&
394 shader->info.stage == MESA_SHADER_VERTEX) ||
395 (var->data.mode == nir_var_shader_out &&
396 shader->info.stage == MESA_SHADER_FRAGMENT) ||
397 shader->info.stage == MESA_SHADER_COMPUTE)
398 continue;
399
400
401 if (var->data.location == VARYING_SLOT_CLIP_DIST0)
402 clip_size = MAX2(clip_size, get_unwrapped_array_length(shader, var));
403
404 if (var->data.location == VARYING_SLOT_CULL_DIST0)
405 cull_size = MAX2(cull_size, get_unwrapped_array_length(shader, var));
406 }
407
408 if (clip_size == 0 && cull_size == 0) {
409 nir_shader_preserve_all_metadata(shader);
410 return false;
411 }
412
413 struct lower_distance_state state;
414 state.old_distance_out_var = NULL;
415 state.old_distance_in_var = NULL;
416 state.new_distance_out_var = NULL;
417 state.new_distance_in_var = NULL;
418 state.shader_stage = shader->info.stage;
419 state.in_name = "gl_ClipDistance";
420 state.total_size = clip_size + cull_size;
421 state.offset = 0;
422 lower_distance_to_vec4(shader, &state);
423
424 state.old_distance_out_var = NULL;
425 state.old_distance_in_var = NULL;
426 state.in_name ="gl_CullDistance";
427 state.offset = clip_size;
428 lower_distance_to_vec4(shader, &state);
429
430 nir_fixup_deref_modes(shader);
431
432 /* Assume we made progress */
433 return true;
434 }
435
436 static bool
combine_clip_cull(nir_shader * nir,nir_variable_mode mode,bool store_info)437 combine_clip_cull(nir_shader *nir,
438 nir_variable_mode mode,
439 bool store_info)
440 {
441 nir_variable *cull = NULL;
442 nir_variable *clip = NULL;
443
444 nir_foreach_variable_with_modes(var, nir, mode) {
445 if (var->data.location == VARYING_SLOT_CLIP_DIST0)
446 clip = var;
447
448 if (var->data.location == VARYING_SLOT_CULL_DIST0)
449 cull = var;
450 }
451
452 if (!cull && !clip) {
453 /* If this is run after optimizations and the variables have been
454 * eliminated, we should update the shader info, because no other
455 * place does that.
456 */
457 if (store_info) {
458 nir->info.clip_distance_array_size = 0;
459 nir->info.cull_distance_array_size = 0;
460 }
461 return false;
462 }
463
464 if (!cull && clip) {
465 /* The GLSL IR lowering pass must have converted these to vectors */
466 if (!clip->data.compact)
467 return false;
468
469 /* If this pass has already run, don't repeat. We would think that
470 * the combined clip/cull distance array was clip-only and mess up.
471 */
472 if (clip->data.how_declared == nir_var_hidden)
473 return false;
474 }
475
476 const unsigned clip_array_size = get_unwrapped_array_length(nir, clip);
477 const unsigned cull_array_size = get_unwrapped_array_length(nir, cull);
478
479 if (store_info) {
480 nir->info.clip_distance_array_size = clip_array_size;
481 nir->info.cull_distance_array_size = cull_array_size;
482 }
483
484 if (clip) {
485 assert(clip->data.compact);
486 clip->data.how_declared = nir_var_hidden;
487 }
488
489 if (cull) {
490 assert(cull->data.compact);
491 cull->data.how_declared = nir_var_hidden;
492 cull->data.location = VARYING_SLOT_CLIP_DIST0 + clip_array_size / 4;
493 cull->data.location_frac = clip_array_size % 4;
494 }
495
496 return true;
497 }
498
499 bool
nir_lower_clip_cull_distance_arrays(nir_shader * nir)500 nir_lower_clip_cull_distance_arrays(nir_shader *nir)
501 {
502 bool progress = false;
503
504 if (nir->info.stage <= MESA_SHADER_GEOMETRY ||
505 nir->info.stage == MESA_SHADER_MESH)
506 progress |= combine_clip_cull(nir, nir_var_shader_out, true);
507
508 if (nir->info.stage > MESA_SHADER_VERTEX &&
509 nir->info.stage <= MESA_SHADER_FRAGMENT) {
510 progress |= combine_clip_cull(nir, nir_var_shader_in,
511 nir->info.stage == MESA_SHADER_FRAGMENT);
512 }
513
514 nir_foreach_function_impl(impl, nir) {
515 if (progress) {
516 nir_metadata_preserve(impl,
517 nir_metadata_control_flow |
518 nir_metadata_live_defs |
519 nir_metadata_loop_analysis);
520 } else {
521 nir_metadata_preserve(impl, nir_metadata_all);
522 }
523 }
524
525 return progress;
526 }
527