• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Connor Abbott (cwabbott0@gmail.com)
25  *    Jason Ekstrand (jason@jlekstrand.net)
26  *
27  */
28 
29 /*
30  * This lowering pass converts references to input/output variables with
31  * loads/stores to actual input/output intrinsics.
32  */
33 
34 #include "nir.h"
35 #include "nir_builder.h"
36 
37 struct lower_io_state {
38    nir_builder builder;
39    int (*type_size)(const struct glsl_type *type);
40    nir_variable_mode modes;
41    nir_lower_io_options options;
42 };
43 
44 void
nir_assign_var_locations(struct exec_list * var_list,unsigned * size,int (* type_size)(const struct glsl_type *))45 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
46                          int (*type_size)(const struct glsl_type *))
47 {
48    unsigned location = 0;
49 
50    nir_foreach_variable(var, var_list) {
51       /*
52        * UBOs have their own address spaces, so don't count them towards the
53        * number of global uniforms
54        */
55       if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) &&
56           var->interface_type != NULL)
57          continue;
58 
59       var->data.driver_location = location;
60       location += type_size(var->type);
61    }
62 
63    *size = location;
64 }
65 
66 /**
67  * Return true if the given variable is a per-vertex input/output array.
68  * (such as geometry shader inputs).
69  */
70 bool
nir_is_per_vertex_io(const nir_variable * var,gl_shader_stage stage)71 nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage)
72 {
73    if (var->data.patch || !glsl_type_is_array(var->type))
74       return false;
75 
76    if (var->data.mode == nir_var_shader_in)
77       return stage == MESA_SHADER_GEOMETRY ||
78              stage == MESA_SHADER_TESS_CTRL ||
79              stage == MESA_SHADER_TESS_EVAL;
80 
81    if (var->data.mode == nir_var_shader_out)
82       return stage == MESA_SHADER_TESS_CTRL;
83 
84    return false;
85 }
86 
87 static nir_ssa_def *
get_io_offset(nir_builder * b,nir_deref_var * deref,nir_ssa_def ** vertex_index,int (* type_size)(const struct glsl_type *),unsigned * component)88 get_io_offset(nir_builder *b, nir_deref_var *deref,
89               nir_ssa_def **vertex_index,
90               int (*type_size)(const struct glsl_type *),
91               unsigned *component)
92 {
93    nir_deref *tail = &deref->deref;
94 
95    /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
96     * outermost array index separate.  Process the rest normally.
97     */
98    if (vertex_index != NULL) {
99       tail = tail->child;
100       nir_deref_array *deref_array = nir_deref_as_array(tail);
101 
102       nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset);
103       if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
104          vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1));
105       }
106       *vertex_index = vtx;
107    }
108 
109    if (deref->var->data.compact) {
110       assert(tail->child->deref_type == nir_deref_type_array);
111       assert(glsl_type_is_scalar(glsl_without_array(deref->var->type)));
112       nir_deref_array *deref_array = nir_deref_as_array(tail->child);
113       /* We always lower indirect dereferences for "compact" array vars. */
114       assert(deref_array->deref_array_type == nir_deref_array_type_direct);
115 
116       const unsigned total_offset = *component + deref_array->base_offset;
117       const unsigned slot_offset = total_offset / 4;
118       *component = total_offset % 4;
119       return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset);
120    }
121 
122    /* Just emit code and let constant-folding go to town */
123    nir_ssa_def *offset = nir_imm_int(b, 0);
124 
125    while (tail->child != NULL) {
126       const struct glsl_type *parent_type = tail->type;
127       tail = tail->child;
128 
129       if (tail->deref_type == nir_deref_type_array) {
130          nir_deref_array *deref_array = nir_deref_as_array(tail);
131          unsigned size = type_size(tail->type);
132 
133          offset = nir_iadd(b, offset,
134                            nir_imm_int(b, size * deref_array->base_offset));
135 
136          if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
137             nir_ssa_def *mul =
138                nir_imul(b, nir_imm_int(b, size),
139                         nir_ssa_for_src(b, deref_array->indirect, 1));
140 
141             offset = nir_iadd(b, offset, mul);
142          }
143       } else if (tail->deref_type == nir_deref_type_struct) {
144          nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
145 
146          unsigned field_offset = 0;
147          for (unsigned i = 0; i < deref_struct->index; i++) {
148             field_offset += type_size(glsl_get_struct_field(parent_type, i));
149          }
150          offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
151       }
152    }
153 
154    return offset;
155 }
156 
157 static nir_intrinsic_instr *
lower_load(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_ssa_def * vertex_index,nir_ssa_def * offset,unsigned component)158 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
159            nir_ssa_def *vertex_index, nir_ssa_def *offset,
160            unsigned component)
161 {
162    const nir_shader *nir = state->builder.shader;
163    nir_variable *var = intrin->variables[0]->var;
164    nir_variable_mode mode = var->data.mode;
165    nir_ssa_def *barycentric = NULL;
166 
167    nir_intrinsic_op op;
168    switch (mode) {
169    case nir_var_shader_in:
170       if (nir->info.stage == MESA_SHADER_FRAGMENT &&
171           nir->options->use_interpolated_input_intrinsics &&
172           var->data.interpolation != INTERP_MODE_FLAT) {
173          assert(vertex_index == NULL);
174 
175          nir_intrinsic_op bary_op;
176          if (var->data.sample ||
177              (state->options & nir_lower_io_force_sample_interpolation))
178             bary_op = nir_intrinsic_load_barycentric_sample;
179          else if (var->data.centroid)
180             bary_op = nir_intrinsic_load_barycentric_centroid;
181          else
182             bary_op = nir_intrinsic_load_barycentric_pixel;
183 
184          barycentric = nir_load_barycentric(&state->builder, bary_op,
185                                             var->data.interpolation);
186          op = nir_intrinsic_load_interpolated_input;
187       } else {
188          op = vertex_index ? nir_intrinsic_load_per_vertex_input :
189                              nir_intrinsic_load_input;
190       }
191       break;
192    case nir_var_shader_out:
193       op = vertex_index ? nir_intrinsic_load_per_vertex_output :
194                           nir_intrinsic_load_output;
195       break;
196    case nir_var_uniform:
197       op = nir_intrinsic_load_uniform;
198       break;
199    case nir_var_shared:
200       op = nir_intrinsic_load_shared;
201       break;
202    default:
203       unreachable("Unknown variable mode");
204    }
205 
206    nir_intrinsic_instr *load =
207       nir_intrinsic_instr_create(state->builder.shader, op);
208    load->num_components = intrin->num_components;
209 
210    nir_intrinsic_set_base(load, var->data.driver_location);
211    if (mode == nir_var_shader_in || mode == nir_var_shader_out)
212       nir_intrinsic_set_component(load, component);
213 
214    if (load->intrinsic == nir_intrinsic_load_uniform)
215       nir_intrinsic_set_range(load, state->type_size(var->type));
216 
217    if (vertex_index) {
218       load->src[0] = nir_src_for_ssa(vertex_index);
219       load->src[1] = nir_src_for_ssa(offset);
220    } else if (barycentric) {
221       load->src[0] = nir_src_for_ssa(barycentric);
222       load->src[1] = nir_src_for_ssa(offset);
223    } else {
224       load->src[0] = nir_src_for_ssa(offset);
225    }
226 
227    return load;
228 }
229 
230 static nir_intrinsic_instr *
lower_store(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_ssa_def * vertex_index,nir_ssa_def * offset,unsigned component)231 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
232             nir_ssa_def *vertex_index, nir_ssa_def *offset,
233             unsigned component)
234 {
235    nir_variable *var = intrin->variables[0]->var;
236    nir_variable_mode mode = var->data.mode;
237 
238    nir_intrinsic_op op;
239    if (mode == nir_var_shared) {
240       op = nir_intrinsic_store_shared;
241    } else {
242       assert(mode == nir_var_shader_out);
243       op = vertex_index ? nir_intrinsic_store_per_vertex_output :
244                           nir_intrinsic_store_output;
245    }
246 
247    nir_intrinsic_instr *store =
248       nir_intrinsic_instr_create(state->builder.shader, op);
249    store->num_components = intrin->num_components;
250 
251    nir_src_copy(&store->src[0], &intrin->src[0], store);
252 
253    nir_intrinsic_set_base(store, var->data.driver_location);
254 
255    if (mode == nir_var_shader_out)
256       nir_intrinsic_set_component(store, component);
257 
258    nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
259 
260    if (vertex_index)
261       store->src[1] = nir_src_for_ssa(vertex_index);
262 
263    store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset);
264 
265    return store;
266 }
267 
268 static nir_intrinsic_instr *
lower_atomic(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_ssa_def * offset)269 lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
270              nir_ssa_def *offset)
271 {
272    nir_variable *var = intrin->variables[0]->var;
273 
274    assert(var->data.mode == nir_var_shared);
275 
276    nir_intrinsic_op op;
277    switch (intrin->intrinsic) {
278 #define OP(O) case nir_intrinsic_var_##O: op = nir_intrinsic_shared_##O; break;
279    OP(atomic_exchange)
280    OP(atomic_comp_swap)
281    OP(atomic_add)
282    OP(atomic_imin)
283    OP(atomic_umin)
284    OP(atomic_imax)
285    OP(atomic_umax)
286    OP(atomic_and)
287    OP(atomic_or)
288    OP(atomic_xor)
289 #undef OP
290    default:
291       unreachable("Invalid atomic");
292    }
293 
294    nir_intrinsic_instr *atomic =
295       nir_intrinsic_instr_create(state->builder.shader, op);
296 
297    nir_intrinsic_set_base(atomic, var->data.driver_location);
298 
299    atomic->src[0] = nir_src_for_ssa(offset);
300    for (unsigned i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; i++) {
301       nir_src_copy(&atomic->src[i+1], &intrin->src[i], atomic);
302    }
303 
304    return atomic;
305 }
306 
307 static nir_intrinsic_instr *
lower_interpolate_at(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_ssa_def * offset,unsigned component)308 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
309                      nir_ssa_def *offset, unsigned component)
310 {
311    nir_variable *var = intrin->variables[0]->var;
312 
313    assert(var->data.mode == nir_var_shader_in);
314 
315    /* Ignore interpolateAt() for flat variables - flat is flat. */
316    if (var->data.interpolation == INTERP_MODE_FLAT)
317       return lower_load(intrin, state, NULL, offset, component);
318 
319    nir_intrinsic_op bary_op;
320    switch (intrin->intrinsic) {
321    case nir_intrinsic_interp_var_at_centroid:
322       bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
323                 nir_intrinsic_load_barycentric_sample :
324                 nir_intrinsic_load_barycentric_centroid;
325       break;
326    case nir_intrinsic_interp_var_at_sample:
327       bary_op = nir_intrinsic_load_barycentric_at_sample;
328       break;
329    case nir_intrinsic_interp_var_at_offset:
330       bary_op = nir_intrinsic_load_barycentric_at_offset;
331       break;
332    default:
333       unreachable("Bogus interpolateAt() intrinsic.");
334    }
335 
336    nir_intrinsic_instr *bary_setup =
337       nir_intrinsic_instr_create(state->builder.shader, bary_op);
338 
339    nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
340    nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
341 
342    if (intrin->intrinsic != nir_intrinsic_interp_var_at_centroid)
343       nir_src_copy(&bary_setup->src[0], &intrin->src[0], bary_setup);
344 
345    nir_builder_instr_insert(&state->builder, &bary_setup->instr);
346 
347    nir_intrinsic_instr *load =
348       nir_intrinsic_instr_create(state->builder.shader,
349                                  nir_intrinsic_load_interpolated_input);
350    load->num_components = intrin->num_components;
351 
352    nir_intrinsic_set_base(load, var->data.driver_location);
353    nir_intrinsic_set_component(load, component);
354 
355    load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
356    load->src[1] = nir_src_for_ssa(offset);
357 
358    return load;
359 }
360 
361 static bool
nir_lower_io_block(nir_block * block,struct lower_io_state * state)362 nir_lower_io_block(nir_block *block,
363                    struct lower_io_state *state)
364 {
365    nir_builder *b = &state->builder;
366    const nir_shader_compiler_options *options = b->shader->options;
367    bool progress = false;
368 
369    nir_foreach_instr_safe(instr, block) {
370       if (instr->type != nir_instr_type_intrinsic)
371          continue;
372 
373       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
374 
375       switch (intrin->intrinsic) {
376       case nir_intrinsic_load_var:
377       case nir_intrinsic_store_var:
378       case nir_intrinsic_var_atomic_add:
379       case nir_intrinsic_var_atomic_imin:
380       case nir_intrinsic_var_atomic_umin:
381       case nir_intrinsic_var_atomic_imax:
382       case nir_intrinsic_var_atomic_umax:
383       case nir_intrinsic_var_atomic_and:
384       case nir_intrinsic_var_atomic_or:
385       case nir_intrinsic_var_atomic_xor:
386       case nir_intrinsic_var_atomic_exchange:
387       case nir_intrinsic_var_atomic_comp_swap:
388          /* We can lower the io for this nir instrinsic */
389          break;
390       case nir_intrinsic_interp_var_at_centroid:
391       case nir_intrinsic_interp_var_at_sample:
392       case nir_intrinsic_interp_var_at_offset:
393          /* We can optionally lower these to load_interpolated_input */
394          if (options->use_interpolated_input_intrinsics)
395             break;
396       default:
397          /* We can't lower the io for this nir instrinsic, so skip it */
398          continue;
399       }
400 
401       nir_variable *var = intrin->variables[0]->var;
402       nir_variable_mode mode = var->data.mode;
403 
404       if ((state->modes & mode) == 0)
405          continue;
406 
407       if (mode != nir_var_shader_in &&
408           mode != nir_var_shader_out &&
409           mode != nir_var_shared &&
410           mode != nir_var_uniform)
411          continue;
412 
413       b->cursor = nir_before_instr(instr);
414 
415       const bool per_vertex = nir_is_per_vertex_io(var, b->shader->info.stage);
416 
417       nir_ssa_def *offset;
418       nir_ssa_def *vertex_index = NULL;
419       unsigned component_offset = var->data.location_frac;
420 
421       offset = get_io_offset(b, intrin->variables[0],
422                              per_vertex ? &vertex_index : NULL,
423                              state->type_size, &component_offset);
424 
425       nir_intrinsic_instr *replacement;
426 
427       switch (intrin->intrinsic) {
428       case nir_intrinsic_load_var:
429          replacement = lower_load(intrin, state, vertex_index, offset,
430                                   component_offset);
431          break;
432 
433       case nir_intrinsic_store_var:
434          replacement = lower_store(intrin, state, vertex_index, offset,
435                                    component_offset);
436          break;
437 
438       case nir_intrinsic_var_atomic_add:
439       case nir_intrinsic_var_atomic_imin:
440       case nir_intrinsic_var_atomic_umin:
441       case nir_intrinsic_var_atomic_imax:
442       case nir_intrinsic_var_atomic_umax:
443       case nir_intrinsic_var_atomic_and:
444       case nir_intrinsic_var_atomic_or:
445       case nir_intrinsic_var_atomic_xor:
446       case nir_intrinsic_var_atomic_exchange:
447       case nir_intrinsic_var_atomic_comp_swap:
448          assert(vertex_index == NULL);
449          replacement = lower_atomic(intrin, state, offset);
450          break;
451 
452       case nir_intrinsic_interp_var_at_centroid:
453       case nir_intrinsic_interp_var_at_sample:
454       case nir_intrinsic_interp_var_at_offset:
455          assert(vertex_index == NULL);
456          replacement = lower_interpolate_at(intrin, state, offset,
457                                             component_offset);
458          break;
459 
460       default:
461          continue;
462       }
463 
464       if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
465          if (intrin->dest.is_ssa) {
466             nir_ssa_dest_init(&replacement->instr, &replacement->dest,
467                               intrin->dest.ssa.num_components,
468                               intrin->dest.ssa.bit_size, NULL);
469             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
470                                      nir_src_for_ssa(&replacement->dest.ssa));
471          } else {
472             nir_dest_copy(&replacement->dest, &intrin->dest, &intrin->instr);
473          }
474       }
475 
476       nir_instr_insert_before(&intrin->instr, &replacement->instr);
477       nir_instr_remove(&intrin->instr);
478       progress = true;
479    }
480 
481    return progress;
482 }
483 
484 static bool
nir_lower_io_impl(nir_function_impl * impl,nir_variable_mode modes,int (* type_size)(const struct glsl_type *),nir_lower_io_options options)485 nir_lower_io_impl(nir_function_impl *impl,
486                   nir_variable_mode modes,
487                   int (*type_size)(const struct glsl_type *),
488                   nir_lower_io_options options)
489 {
490    struct lower_io_state state;
491    bool progress = false;
492 
493    nir_builder_init(&state.builder, impl);
494    state.modes = modes;
495    state.type_size = type_size;
496    state.options = options;
497 
498    nir_foreach_block(block, impl) {
499       progress |= nir_lower_io_block(block, &state);
500    }
501 
502    nir_metadata_preserve(impl, nir_metadata_block_index |
503                                nir_metadata_dominance);
504    return progress;
505 }
506 
507 bool
nir_lower_io(nir_shader * shader,nir_variable_mode modes,int (* type_size)(const struct glsl_type *),nir_lower_io_options options)508 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
509              int (*type_size)(const struct glsl_type *),
510              nir_lower_io_options options)
511 {
512    bool progress = false;
513 
514    nir_foreach_function(function, shader) {
515       if (function->impl) {
516          progress |= nir_lower_io_impl(function->impl, modes,
517                                        type_size, options);
518       }
519    }
520 
521    return progress;
522 }
523 
524 /**
525  * Return the offset source for a load/store intrinsic.
526  */
527 nir_src *
nir_get_io_offset_src(nir_intrinsic_instr * instr)528 nir_get_io_offset_src(nir_intrinsic_instr *instr)
529 {
530    switch (instr->intrinsic) {
531    case nir_intrinsic_load_input:
532    case nir_intrinsic_load_output:
533    case nir_intrinsic_load_uniform:
534       return &instr->src[0];
535    case nir_intrinsic_load_ubo:
536    case nir_intrinsic_load_ssbo:
537    case nir_intrinsic_load_per_vertex_input:
538    case nir_intrinsic_load_per_vertex_output:
539    case nir_intrinsic_load_interpolated_input:
540    case nir_intrinsic_store_output:
541       return &instr->src[1];
542    case nir_intrinsic_store_ssbo:
543    case nir_intrinsic_store_per_vertex_output:
544       return &instr->src[2];
545    default:
546       return NULL;
547    }
548 }
549 
550 /**
551  * Return the vertex index source for a load/store per_vertex intrinsic.
552  */
553 nir_src *
nir_get_io_vertex_index_src(nir_intrinsic_instr * instr)554 nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
555 {
556    switch (instr->intrinsic) {
557    case nir_intrinsic_load_per_vertex_input:
558    case nir_intrinsic_load_per_vertex_output:
559       return &instr->src[0];
560    case nir_intrinsic_store_per_vertex_output:
561       return &instr->src[1];
562    default:
563       return NULL;
564    }
565 }
566