• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file lower_ubo_reference.cpp
26  *
27  * IR lower pass to replace dereferences of variables in a uniform
28  * buffer object with usage of ir_binop_ubo_load expressions, each of
29  * which can read data up to the size of a vec4.
30  *
31  * This relieves drivers of the responsibility to deal with tricky UBO
32  * layout issues like std140 structures and row_major matrices on
33  * their own.
34  */
35 
36 #include "lower_buffer_access.h"
37 #include "ir_builder.h"
38 #include "main/macros.h"
39 #include "glsl_parser_extras.h"
40 
41 using namespace ir_builder;
42 
43 namespace {
44 class lower_ubo_reference_visitor :
45       public lower_buffer_access::lower_buffer_access {
46 public:
lower_ubo_reference_visitor(struct gl_linked_shader * shader,bool clamp_block_indices,bool use_std430_as_default)47    lower_ubo_reference_visitor(struct gl_linked_shader *shader,
48                                bool clamp_block_indices,
49                                bool use_std430_as_default)
50    : shader(shader), clamp_block_indices(clamp_block_indices),
51      struct_field(NULL), variable(NULL)
52    {
53       this->use_std430_as_default = use_std430_as_default;
54    }
55 
56    void handle_rvalue(ir_rvalue **rvalue);
57    ir_visitor_status visit_enter(ir_assignment *ir);
58 
59    void setup_for_load_or_store(void *mem_ctx,
60                                 ir_variable *var,
61                                 ir_rvalue *deref,
62                                 ir_rvalue **offset,
63                                 unsigned *const_offset,
64                                 bool *row_major,
65                                 const glsl_type **matrix_type,
66                                 enum glsl_interface_packing packing);
67    uint32_t ssbo_access_params();
68    ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
69 			   ir_rvalue *offset);
70    ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
71                       ir_rvalue *offset);
72 
73    bool check_for_buffer_array_copy(ir_assignment *ir);
74    bool check_for_buffer_struct_copy(ir_assignment *ir);
75    void check_for_ssbo_store(ir_assignment *ir);
76    void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var,
77                         ir_variable *write_var, unsigned write_mask);
78    ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
79                        unsigned write_mask);
80 
81    enum {
82       ubo_load_access,
83       ssbo_load_access,
84       ssbo_store_access,
85       ssbo_unsized_array_length_access,
86       ssbo_atomic_access,
87    } buffer_access_type;
88 
89    void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
90                              const glsl_type *type, ir_rvalue *offset,
91                              unsigned mask, int channel);
92 
93    ir_visitor_status visit_enter(class ir_expression *);
94    ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
95    void check_ssbo_unsized_array_length_expression(class ir_expression *);
96    void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
97 
98    ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
99                                                     ir_dereference *,
100                                                     ir_variable *);
101    ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx);
102 
103    unsigned calculate_unsized_array_stride(ir_dereference *deref,
104                                            enum glsl_interface_packing packing);
105 
106    ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
107    ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
108    ir_visitor_status visit_enter(ir_call *ir);
109    ir_visitor_status visit_enter(ir_texture *ir);
110 
111    struct gl_linked_shader *shader;
112    bool clamp_block_indices;
113    const struct glsl_struct_field *struct_field;
114    ir_variable *variable;
115    ir_rvalue *uniform_block;
116    bool progress;
117 };
118 
119 /**
120  * Determine the name of the interface block field
121  *
122  * This is the name of the specific member as it would appear in the
123  * \c gl_uniform_buffer_variable::Name field in the shader's
124  * \c UniformBlocks array.
125  */
126 static const char *
interface_field_name(void * mem_ctx,char * base_name,ir_rvalue * d,ir_rvalue ** nonconst_block_index)127 interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
128                      ir_rvalue **nonconst_block_index)
129 {
130    *nonconst_block_index = NULL;
131    char *name_copy = NULL;
132    size_t base_length = 0;
133 
134    /* Loop back through the IR until we find the uniform block */
135    ir_rvalue *ir = d;
136    while (ir != NULL) {
137       switch (ir->ir_type) {
138       case ir_type_dereference_variable: {
139          /* Exit loop */
140          ir = NULL;
141          break;
142       }
143 
144       case ir_type_dereference_record: {
145          ir_dereference_record *r = (ir_dereference_record *) ir;
146          ir = r->record->as_dereference();
147 
148          /* If we got here it means any previous array subscripts belong to
149           * block members and not the block itself so skip over them in the
150           * next pass.
151           */
152          d = ir;
153          break;
154       }
155 
156       case ir_type_dereference_array: {
157          ir_dereference_array *a = (ir_dereference_array *) ir;
158          ir = a->array->as_dereference();
159          break;
160       }
161 
162       case ir_type_swizzle: {
163          ir_swizzle *s = (ir_swizzle *) ir;
164          ir = s->val->as_dereference();
165          /* Skip swizzle in the next pass */
166          d = ir;
167          break;
168       }
169 
170       default:
171          assert(!"Should not get here.");
172          break;
173       }
174    }
175 
176    while (d != NULL) {
177       switch (d->ir_type) {
178       case ir_type_dereference_variable: {
179          ir_dereference_variable *v = (ir_dereference_variable *) d;
180          if (name_copy != NULL &&
181              v->var->is_interface_instance() &&
182              v->var->type->is_array()) {
183             return name_copy;
184          } else {
185             *nonconst_block_index = NULL;
186             return base_name;
187          }
188 
189          break;
190       }
191 
192       case ir_type_dereference_array: {
193          ir_dereference_array *a = (ir_dereference_array *) d;
194          size_t new_length;
195 
196          if (name_copy == NULL) {
197             name_copy = ralloc_strdup(mem_ctx, base_name);
198             base_length = strlen(name_copy);
199          }
200 
201          /* For arrays of arrays we start at the innermost array and work our
202           * way out so we need to insert the subscript at the base of the
203           * name string rather than just attaching it to the end.
204           */
205          new_length = base_length;
206          ir_constant *const_index = a->array_index->as_constant();
207          char *end = ralloc_strdup(NULL, &name_copy[new_length]);
208          if (!const_index) {
209             ir_rvalue *array_index = a->array_index;
210             if (array_index->type != glsl_type::uint_type)
211                array_index = i2u(array_index);
212 
213             if (a->array->type->is_array() &&
214                 a->array->type->fields.array->is_array()) {
215                ir_constant *base_size = new(mem_ctx)
216                   ir_constant(a->array->type->fields.array->arrays_of_arrays_size());
217                array_index = mul(array_index, base_size);
218             }
219 
220             if (*nonconst_block_index) {
221                *nonconst_block_index = add(*nonconst_block_index, array_index);
222             } else {
223                *nonconst_block_index = array_index;
224             }
225 
226             ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s",
227                                          end);
228          } else {
229             ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s",
230                                          const_index->get_uint_component(0),
231                                          end);
232          }
233          ralloc_free(end);
234 
235          d = a->array->as_dereference();
236 
237          break;
238       }
239 
240       default:
241          assert(!"Should not get here.");
242          break;
243       }
244    }
245 
246    assert(!"Should not get here.");
247    return NULL;
248 }
249 
250 static ir_rvalue *
clamp_to_array_bounds(void * mem_ctx,ir_rvalue * index,const glsl_type * type)251 clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type)
252 {
253    assert(type->is_array());
254 
255    const unsigned array_size = type->arrays_of_arrays_size();
256 
257    ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1);
258    max_index->type = index->type;
259 
260    ir_constant *zero = new(mem_ctx) ir_constant(0);
261    zero->type = index->type;
262 
263    if (index->type->base_type == GLSL_TYPE_INT)
264       index = max2(index, zero);
265    index = min2(index, max_index);
266 
267    return index;
268 }
269 
270 void
setup_for_load_or_store(void * mem_ctx,ir_variable * var,ir_rvalue * deref,ir_rvalue ** offset,unsigned * const_offset,bool * row_major,const glsl_type ** matrix_type,enum glsl_interface_packing packing)271 lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
272                                                      ir_variable *var,
273                                                      ir_rvalue *deref,
274                                                      ir_rvalue **offset,
275                                                      unsigned *const_offset,
276                                                      bool *row_major,
277                                                      const glsl_type **matrix_type,
278                                                      enum glsl_interface_packing packing)
279 {
280    /* Determine the name of the interface block */
281    ir_rvalue *nonconst_block_index;
282    const char *const field_name =
283       interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
284                            deref, &nonconst_block_index);
285 
286    if (nonconst_block_index && clamp_block_indices) {
287       nonconst_block_index =
288          clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type);
289    }
290 
291    /* Locate the block by interface name */
292    unsigned num_blocks;
293    struct gl_uniform_block **blocks;
294    if (this->buffer_access_type != ubo_load_access) {
295       num_blocks = shader->Program->info.num_ssbos;
296       blocks = shader->Program->sh.ShaderStorageBlocks;
297    } else {
298       num_blocks = shader->Program->info.num_ubos;
299       blocks = shader->Program->sh.UniformBlocks;
300    }
301    this->uniform_block = NULL;
302    for (unsigned i = 0; i < num_blocks; i++) {
303       if (strcmp(field_name, blocks[i]->Name) == 0) {
304 
305          ir_constant *index = new(mem_ctx) ir_constant(i);
306 
307          if (nonconst_block_index) {
308             this->uniform_block = add(nonconst_block_index, index);
309          } else {
310             this->uniform_block = index;
311          }
312 
313          if (var->is_interface_instance()) {
314             *const_offset = 0;
315          } else {
316             *const_offset = blocks[i]->Uniforms[var->data.location].Offset;
317          }
318 
319          break;
320       }
321    }
322 
323    assert(this->uniform_block);
324 
325    this->struct_field = NULL;
326    setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major,
327                        matrix_type, &this->struct_field, packing);
328 }
329 
330 void
handle_rvalue(ir_rvalue ** rvalue)331 lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
332 {
333    if (!*rvalue)
334       return;
335 
336    ir_dereference *deref = (*rvalue)->as_dereference();
337    if (!deref)
338       return;
339 
340    ir_variable *var = deref->variable_referenced();
341    if (!var || !var->is_in_buffer_block())
342       return;
343 
344    void *mem_ctx = ralloc_parent(shader->ir);
345 
346    ir_rvalue *offset = NULL;
347    unsigned const_offset;
348    bool row_major;
349    const glsl_type *matrix_type;
350 
351    enum glsl_interface_packing packing =
352       var->get_interface_type()->
353          get_internal_ifc_packing(use_std430_as_default);
354 
355    this->buffer_access_type =
356       var->is_in_shader_storage_block() ?
357       ssbo_load_access : ubo_load_access;
358    this->variable = var;
359 
360    /* Compute the offset to the start if the dereference as well as other
361     * information we need to configure the write
362     */
363    setup_for_load_or_store(mem_ctx, var, deref,
364                            &offset, &const_offset,
365                            &row_major, &matrix_type,
366                            packing);
367    assert(offset);
368 
369    /* Now that we've calculated the offset to the start of the
370     * dereference, walk over the type and emit loads into a temporary.
371     */
372    const glsl_type *type = (*rvalue)->type;
373    ir_variable *load_var = new(mem_ctx) ir_variable(type,
374 						    "ubo_load_temp",
375 						    ir_var_temporary);
376    base_ir->insert_before(load_var);
377 
378    ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
379 						       "ubo_load_temp_offset",
380 						       ir_var_temporary);
381    base_ir->insert_before(load_offset);
382    base_ir->insert_before(assign(load_offset, offset));
383 
384    deref = new(mem_ctx) ir_dereference_variable(load_var);
385    emit_access(mem_ctx, false, deref, load_offset, const_offset,
386                row_major, matrix_type, packing, 0);
387    *rvalue = deref;
388 
389    progress = true;
390 }
391 
392 ir_expression *
ubo_load(void * mem_ctx,const glsl_type * type,ir_rvalue * offset)393 lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
394                                       const glsl_type *type,
395 				      ir_rvalue *offset)
396 {
397    ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
398    return new(mem_ctx)
399       ir_expression(ir_binop_ubo_load,
400                     type,
401                     block_ref,
402                     offset);
403 
404 }
405 
406 static bool
shader_storage_buffer_object(const _mesa_glsl_parse_state * state)407 shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
408 {
409    return state->has_shader_storage_buffer_objects();
410 }
411 
412 uint32_t
ssbo_access_params()413 lower_ubo_reference_visitor::ssbo_access_params()
414 {
415    assert(variable);
416 
417    if (variable->is_interface_instance()) {
418       assert(struct_field);
419 
420       return ((struct_field->memory_coherent ? ACCESS_COHERENT : 0) |
421               (struct_field->memory_restrict ? ACCESS_RESTRICT : 0) |
422               (struct_field->memory_volatile ? ACCESS_VOLATILE : 0));
423    } else {
424       return ((variable->data.memory_coherent ? ACCESS_COHERENT : 0) |
425               (variable->data.memory_restrict ? ACCESS_RESTRICT : 0) |
426               (variable->data.memory_volatile ? ACCESS_VOLATILE : 0));
427    }
428 }
429 
430 ir_call *
ssbo_store(void * mem_ctx,ir_rvalue * deref,ir_rvalue * offset,unsigned write_mask)431 lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
432                                         ir_rvalue *deref,
433                                         ir_rvalue *offset,
434                                         unsigned write_mask)
435 {
436    exec_list sig_params;
437 
438    ir_variable *block_ref = new(mem_ctx)
439       ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
440    sig_params.push_tail(block_ref);
441 
442    ir_variable *offset_ref = new(mem_ctx)
443       ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
444    sig_params.push_tail(offset_ref);
445 
446    ir_variable *val_ref = new(mem_ctx)
447       ir_variable(deref->type, "value" , ir_var_function_in);
448    sig_params.push_tail(val_ref);
449 
450    ir_variable *writemask_ref = new(mem_ctx)
451       ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
452    sig_params.push_tail(writemask_ref);
453 
454    ir_variable *access_ref = new(mem_ctx)
455       ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
456    sig_params.push_tail(access_ref);
457 
458    ir_function_signature *sig = new(mem_ctx)
459       ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
460    assert(sig);
461    sig->replace_parameters(&sig_params);
462    sig->intrinsic_id = ir_intrinsic_ssbo_store;
463 
464    ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo");
465    f->add_signature(sig);
466 
467    exec_list call_params;
468    call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
469    call_params.push_tail(offset->clone(mem_ctx, NULL));
470    call_params.push_tail(deref->clone(mem_ctx, NULL));
471    call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
472    call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
473    return new(mem_ctx) ir_call(sig, NULL, &call_params);
474 }
475 
476 ir_call *
ssbo_load(void * mem_ctx,const struct glsl_type * type,ir_rvalue * offset)477 lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
478                                        const struct glsl_type *type,
479                                        ir_rvalue *offset)
480 {
481    exec_list sig_params;
482 
483    ir_variable *block_ref = new(mem_ctx)
484       ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
485    sig_params.push_tail(block_ref);
486 
487    ir_variable *offset_ref = new(mem_ctx)
488       ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
489    sig_params.push_tail(offset_ref);
490 
491    ir_variable *access_ref = new(mem_ctx)
492       ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
493    sig_params.push_tail(access_ref);
494 
495    ir_function_signature *sig =
496       new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
497    assert(sig);
498    sig->replace_parameters(&sig_params);
499    sig->intrinsic_id = ir_intrinsic_ssbo_load;
500 
501    ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo");
502    f->add_signature(sig);
503 
504    ir_variable *result = new(mem_ctx)
505       ir_variable(type, "ssbo_load_result", ir_var_temporary);
506    base_ir->insert_before(result);
507    ir_dereference_variable *deref_result = new(mem_ctx)
508       ir_dereference_variable(result);
509 
510    exec_list call_params;
511    call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
512    call_params.push_tail(offset->clone(mem_ctx, NULL));
513    call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
514 
515    return new(mem_ctx) ir_call(sig, deref_result, &call_params);
516 }
517 
518 void
insert_buffer_access(void * mem_ctx,ir_dereference * deref,const glsl_type * type,ir_rvalue * offset,unsigned mask,int channel)519 lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx,
520                                                   ir_dereference *deref,
521                                                   const glsl_type *type,
522                                                   ir_rvalue *offset,
523                                                   unsigned mask,
524                                                   int channel)
525 {
526    switch (this->buffer_access_type) {
527    case ubo_load_access:
528       base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
529                                     ubo_load(mem_ctx, type, offset),
530                                     mask));
531       break;
532    case ssbo_load_access: {
533       ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset);
534       base_ir->insert_before(load_ssbo);
535       ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
536       ir_assignment *assignment =
537          assign(deref->clone(mem_ctx, NULL), value, mask);
538       base_ir->insert_before(assignment);
539       break;
540    }
541    case ssbo_store_access:
542       if (channel >= 0) {
543          base_ir->insert_after(ssbo_store(mem_ctx,
544                                           swizzle(deref, channel, 1),
545                                           offset, 1));
546       } else {
547          base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask));
548       }
549       break;
550    default:
551       unreachable("invalid buffer_access_type in insert_buffer_access");
552    }
553 }
554 
555 void
write_to_memory(void * mem_ctx,ir_dereference * deref,ir_variable * var,ir_variable * write_var,unsigned write_mask)556 lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
557                                              ir_dereference *deref,
558                                              ir_variable *var,
559                                              ir_variable *write_var,
560                                              unsigned write_mask)
561 {
562    ir_rvalue *offset = NULL;
563    unsigned const_offset;
564    bool row_major;
565    const glsl_type *matrix_type;
566 
567    enum glsl_interface_packing packing =
568       var->get_interface_type()->
569          get_internal_ifc_packing(use_std430_as_default);
570 
571    this->buffer_access_type = ssbo_store_access;
572    this->variable = var;
573 
574    /* Compute the offset to the start if the dereference as well as other
575     * information we need to configure the write
576     */
577    setup_for_load_or_store(mem_ctx, var, deref,
578                            &offset, &const_offset,
579                            &row_major, &matrix_type,
580                            packing);
581    assert(offset);
582 
583    /* Now emit writes from the temporary to memory */
584    ir_variable *write_offset =
585       new(mem_ctx) ir_variable(glsl_type::uint_type,
586                                "ssbo_store_temp_offset",
587                                ir_var_temporary);
588 
589    base_ir->insert_before(write_offset);
590    base_ir->insert_before(assign(write_offset, offset));
591 
592    deref = new(mem_ctx) ir_dereference_variable(write_var);
593    emit_access(mem_ctx, true, deref, write_offset, const_offset,
594                row_major, matrix_type, packing, write_mask);
595 }
596 
597 ir_visitor_status
visit_enter(ir_expression * ir)598 lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
599 {
600    check_ssbo_unsized_array_length_expression(ir);
601    return rvalue_visit(ir);
602 }
603 
604 ir_expression *
calculate_ssbo_unsized_array_length(ir_expression * expr)605 lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr)
606 {
607    if (expr->operation !=
608        ir_expression_operation(ir_unop_ssbo_unsized_array_length))
609       return NULL;
610 
611    ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
612    if (!rvalue ||
613        !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
614       return NULL;
615 
616    ir_dereference *deref = expr->operands[0]->as_dereference();
617    if (!deref)
618       return NULL;
619 
620    ir_variable *var = expr->operands[0]->variable_referenced();
621    if (!var || !var->is_in_shader_storage_block())
622       return NULL;
623    return process_ssbo_unsized_array_length(&rvalue, deref, var);
624 }
625 
626 void
check_ssbo_unsized_array_length_expression(ir_expression * ir)627 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir)
628 {
629    if (ir->operation ==
630        ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
631          /* Don't replace this unop if it is found alone. It is going to be
632           * removed by the optimization passes or replaced if it is part of
633           * an ir_assignment or another ir_expression.
634           */
635          return;
636    }
637 
638    for (unsigned i = 0; i < ir->num_operands; i++) {
639       if (ir->operands[i]->ir_type != ir_type_expression)
640          continue;
641       ir_expression *expr = (ir_expression *) ir->operands[i];
642       ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
643       if (!temp)
644          continue;
645 
646       delete expr;
647       ir->operands[i] = temp;
648    }
649 }
650 
651 void
check_ssbo_unsized_array_length_assignment(ir_assignment * ir)652 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
653 {
654    if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
655       return;
656 
657    ir_expression *expr = (ir_expression *) ir->rhs;
658    ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
659    if (!temp)
660       return;
661 
662    delete expr;
663    ir->rhs = temp;
664    return;
665 }
666 
667 ir_expression *
emit_ssbo_get_buffer_size(void * mem_ctx)668 lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx)
669 {
670    ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
671    return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
672                                      glsl_type::int_type,
673                                      block_ref);
674 }
675 
676 unsigned
calculate_unsized_array_stride(ir_dereference * deref,enum glsl_interface_packing packing)677 lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref,
678                                                             enum glsl_interface_packing packing)
679 {
680    unsigned array_stride = 0;
681 
682    switch (deref->ir_type) {
683    case ir_type_dereference_variable:
684    {
685       ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
686       const struct glsl_type *unsized_array_type = NULL;
687       /* An unsized array can be sized by other lowering passes, so pick
688        * the first field of the array which has the data type of the unsized
689        * array.
690        */
691       unsized_array_type = deref_var->var->type->fields.array;
692 
693       /* Whether or not the field is row-major (because it might be a
694        * bvec2 or something) does not affect the array itself. We need
695        * to know whether an array element in its entirety is row-major.
696        */
697       const bool array_row_major =
698          is_dereferenced_thing_row_major(deref_var);
699 
700       if (packing == GLSL_INTERFACE_PACKING_STD430) {
701          array_stride = unsized_array_type->std430_array_stride(array_row_major);
702       } else {
703          array_stride = unsized_array_type->std140_size(array_row_major);
704          array_stride = glsl_align(array_stride, 16);
705       }
706       break;
707    }
708    case ir_type_dereference_record:
709    {
710       ir_dereference_record *deref_record = (ir_dereference_record *) deref;
711       ir_dereference *interface_deref =
712          deref_record->record->as_dereference();
713       assert(interface_deref != NULL);
714       const struct glsl_type *interface_type = interface_deref->type;
715       unsigned record_length = interface_type->length;
716       /* Unsized array is always the last element of the interface */
717       const struct glsl_type *unsized_array_type =
718          interface_type->fields.structure[record_length - 1].type->fields.array;
719 
720       const bool array_row_major =
721          is_dereferenced_thing_row_major(deref_record);
722 
723       if (packing == GLSL_INTERFACE_PACKING_STD430) {
724          array_stride = unsized_array_type->std430_array_stride(array_row_major);
725       } else {
726          array_stride = unsized_array_type->std140_size(array_row_major);
727          array_stride = glsl_align(array_stride, 16);
728       }
729       break;
730    }
731    default:
732       unreachable("Unsupported dereference type");
733    }
734    return array_stride;
735 }
736 
737 ir_expression *
process_ssbo_unsized_array_length(ir_rvalue ** rvalue,ir_dereference * deref,ir_variable * var)738 lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
739                                                                ir_dereference *deref,
740                                                                ir_variable *var)
741 {
742    void *mem_ctx = ralloc_parent(*rvalue);
743 
744    ir_rvalue *base_offset = NULL;
745    unsigned const_offset;
746    bool row_major;
747    const glsl_type *matrix_type;
748 
749    enum glsl_interface_packing packing =
750       var->get_interface_type()->
751          get_internal_ifc_packing(use_std430_as_default);
752    int unsized_array_stride =
753       calculate_unsized_array_stride(deref, packing);
754 
755    this->buffer_access_type = ssbo_unsized_array_length_access;
756    this->variable = var;
757 
758    /* Compute the offset to the start if the dereference as well as other
759     * information we need to calculate the length.
760     */
761    setup_for_load_or_store(mem_ctx, var, deref,
762                            &base_offset, &const_offset,
763                            &row_major, &matrix_type,
764                            packing);
765    /* array.length() =
766     *  max((buffer_object_size - offset_of_array) / stride_of_array, 0)
767     */
768    ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx);
769 
770    ir_expression *offset_of_array = new(mem_ctx)
771       ir_expression(ir_binop_add, base_offset,
772                     new(mem_ctx) ir_constant(const_offset));
773    ir_expression *offset_of_array_int = new(mem_ctx)
774       ir_expression(ir_unop_u2i, offset_of_array);
775 
776    ir_expression *sub = new(mem_ctx)
777       ir_expression(ir_binop_sub, buffer_size, offset_of_array_int);
778    ir_expression *div =  new(mem_ctx)
779       ir_expression(ir_binop_div, sub,
780                     new(mem_ctx) ir_constant(unsized_array_stride));
781    ir_expression *max = new(mem_ctx)
782       ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0));
783 
784    return max;
785 }
786 
787 void
check_for_ssbo_store(ir_assignment * ir)788 lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
789 {
790    if (!ir || !ir->lhs)
791       return;
792 
793    ir_rvalue *rvalue = ir->lhs->as_rvalue();
794    if (!rvalue)
795       return;
796 
797    ir_dereference *deref = ir->lhs->as_dereference();
798    if (!deref)
799       return;
800 
801    ir_variable *var = ir->lhs->variable_referenced();
802    if (!var || !var->is_in_shader_storage_block())
803       return;
804 
805    /* We have a write to a buffer variable, so declare a temporary and rewrite
806     * the assignment so that the temporary is the LHS.
807     */
808    void *mem_ctx = ralloc_parent(shader->ir);
809 
810    const glsl_type *type = rvalue->type;
811    ir_variable *write_var = new(mem_ctx) ir_variable(type,
812                                                      "ssbo_store_temp",
813                                                      ir_var_temporary);
814    base_ir->insert_before(write_var);
815    ir->lhs = new(mem_ctx) ir_dereference_variable(write_var);
816 
817    /* Now we have to write the value assigned to the temporary back to memory */
818    write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask);
819    progress = true;
820 }
821 
822 static bool
is_buffer_backed_variable(ir_variable * var)823 is_buffer_backed_variable(ir_variable *var)
824 {
825    return var->is_in_buffer_block() ||
826           var->data.mode == ir_var_shader_shared;
827 }
828 
829 bool
check_for_buffer_array_copy(ir_assignment * ir)830 lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
831 {
832    if (!ir || !ir->lhs || !ir->rhs)
833       return false;
834 
835    /* LHS and RHS must be arrays
836     * FIXME: arrays of arrays?
837     */
838    if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array())
839       return false;
840 
841    /* RHS must be a buffer-backed variable. This is what can cause the problem
842     * since it would lead to a series of loads that need to live until we
843     * see the writes to the LHS.
844     */
845    ir_variable *rhs_var = ir->rhs->variable_referenced();
846    if (!rhs_var || !is_buffer_backed_variable(rhs_var))
847       return false;
848 
849    /* Split the array copy into individual element copies to reduce
850     * register pressure
851     */
852    ir_dereference *rhs_deref = ir->rhs->as_dereference();
853    if (!rhs_deref)
854       return false;
855 
856    ir_dereference *lhs_deref = ir->lhs->as_dereference();
857    if (!lhs_deref)
858       return false;
859 
860    assert(lhs_deref->type->length == rhs_deref->type->length);
861    void *mem_ctx = ralloc_parent(shader->ir);
862 
863    for (unsigned i = 0; i < lhs_deref->type->length; i++) {
864       ir_dereference *lhs_i =
865          new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
866                                            new(mem_ctx) ir_constant(i));
867 
868       ir_dereference *rhs_i =
869          new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
870                                            new(mem_ctx) ir_constant(i));
871       ir->insert_after(assign(lhs_i, rhs_i));
872    }
873 
874    ir->remove();
875    progress = true;
876    return true;
877 }
878 
879 bool
check_for_buffer_struct_copy(ir_assignment * ir)880 lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir)
881 {
882    if (!ir || !ir->lhs || !ir->rhs)
883       return false;
884 
885    /* LHS and RHS must be records */
886    if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record())
887       return false;
888 
889    /* RHS must be a buffer-backed variable. This is what can cause the problem
890     * since it would lead to a series of loads that need to live until we
891     * see the writes to the LHS.
892     */
893    ir_variable *rhs_var = ir->rhs->variable_referenced();
894    if (!rhs_var || !is_buffer_backed_variable(rhs_var))
895       return false;
896 
897    /* Split the struct copy into individual element copies to reduce
898     * register pressure
899     */
900    ir_dereference *rhs_deref = ir->rhs->as_dereference();
901    if (!rhs_deref)
902       return false;
903 
904    ir_dereference *lhs_deref = ir->lhs->as_dereference();
905    if (!lhs_deref)
906       return false;
907 
908    assert(lhs_deref->type == rhs_deref->type);
909    void *mem_ctx = ralloc_parent(shader->ir);
910 
911    for (unsigned i = 0; i < lhs_deref->type->length; i++) {
912       const char *field_name = lhs_deref->type->fields.structure[i].name;
913       ir_dereference *lhs_field =
914          new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL),
915                                             field_name);
916       ir_dereference *rhs_field =
917          new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL),
918                                             field_name);
919       ir->insert_after(assign(lhs_field, rhs_field));
920    }
921 
922    ir->remove();
923    progress = true;
924    return true;
925 }
926 
927 ir_visitor_status
visit_enter(ir_assignment * ir)928 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
929 {
930    /* Array and struct copies could involve large amounts of load/store
931     * operations. To improve register pressure we want to special-case
932     * these and split them into individual element copies.
933     * This way we avoid emitting all the loads for the RHS first and
934     * all the writes for the LHS second and register usage is more
935     * efficient.
936     */
937    if (check_for_buffer_array_copy(ir))
938       return visit_continue_with_parent;
939 
940    if (check_for_buffer_struct_copy(ir))
941       return visit_continue_with_parent;
942 
943    check_ssbo_unsized_array_length_assignment(ir);
944    check_for_ssbo_store(ir);
945    return rvalue_visit(ir);
946 }
947 
948 /* Lowers the intrinsic call to a new internal intrinsic that swaps the
949  * access to the buffer variable in the first parameter by an offset
950  * and block index. This involves creating the new internal intrinsic
951  * (i.e. the new function signature).
952  */
953 ir_call *
lower_ssbo_atomic_intrinsic(ir_call * ir)954 lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
955 {
956    /* SSBO atomics usually have 2 parameters, the buffer variable and an
957     * integer argument. The exception is CompSwap, that has an additional
958     * integer parameter.
959     */
960    int param_count = ir->actual_parameters.length();
961    assert(param_count == 2 || param_count == 3);
962 
963    /* First argument must be a scalar integer buffer variable */
964    exec_node *param = ir->actual_parameters.get_head();
965    ir_instruction *inst = (ir_instruction *) param;
966    assert(inst->ir_type == ir_type_dereference_variable ||
967           inst->ir_type == ir_type_dereference_array ||
968           inst->ir_type == ir_type_dereference_record ||
969           inst->ir_type == ir_type_swizzle);
970 
971    ir_rvalue *deref = (ir_rvalue *) inst;
972    assert(deref->type->is_scalar() && deref->type->is_integer());
973 
974    ir_variable *var = deref->variable_referenced();
975    assert(var);
976 
977    /* Compute the offset to the start if the dereference and the
978     * block index
979     */
980    void *mem_ctx = ralloc_parent(shader->ir);
981 
982    ir_rvalue *offset = NULL;
983    unsigned const_offset;
984    bool row_major;
985    const glsl_type *matrix_type;
986 
987    enum glsl_interface_packing packing =
988       var->get_interface_type()->
989          get_internal_ifc_packing(use_std430_as_default);
990 
991    this->buffer_access_type = ssbo_atomic_access;
992    this->variable = var;
993 
994    setup_for_load_or_store(mem_ctx, var, deref,
995                            &offset, &const_offset,
996                            &row_major, &matrix_type,
997                            packing);
998    assert(offset);
999    assert(!row_major);
1000    assert(matrix_type == NULL);
1001 
1002    ir_rvalue *deref_offset =
1003       add(offset, new(mem_ctx) ir_constant(const_offset));
1004    ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
1005 
1006    /* Create the new internal function signature that will take a block
1007     * index and offset instead of a buffer variable
1008     */
1009    exec_list sig_params;
1010    ir_variable *sig_param = new(mem_ctx)
1011       ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
1012    sig_params.push_tail(sig_param);
1013 
1014    sig_param = new(mem_ctx)
1015       ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
1016    sig_params.push_tail(sig_param);
1017 
1018    const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
1019       glsl_type::int_type : glsl_type::uint_type;
1020    sig_param = new(mem_ctx)
1021          ir_variable(type, "data1", ir_var_function_in);
1022    sig_params.push_tail(sig_param);
1023 
1024    if (param_count == 3) {
1025       sig_param = new(mem_ctx)
1026             ir_variable(type, "data2", ir_var_function_in);
1027       sig_params.push_tail(sig_param);
1028    }
1029 
1030    ir_function_signature *sig =
1031       new(mem_ctx) ir_function_signature(deref->type,
1032                                          shader_storage_buffer_object);
1033    assert(sig);
1034    sig->replace_parameters(&sig_params);
1035 
1036    assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load);
1037    assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap);
1038    sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, ssbo);
1039 
1040    char func_name[64];
1041    sprintf(func_name, "%s_ssbo", ir->callee_name());
1042    ir_function *f = new(mem_ctx) ir_function(func_name);
1043    f->add_signature(sig);
1044 
1045    /* Now, create the call to the internal intrinsic */
1046    exec_list call_params;
1047    call_params.push_tail(block_index);
1048    call_params.push_tail(deref_offset);
1049    param = ir->actual_parameters.get_head()->get_next();
1050    ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1051    call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1052    if (param_count == 3) {
1053       param = param->get_next();
1054       param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1055       call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1056    }
1057    ir_dereference_variable *return_deref =
1058       ir->return_deref->clone(mem_ctx, NULL);
1059    return new(mem_ctx) ir_call(sig, return_deref, &call_params);
1060 }
1061 
1062 ir_call *
check_for_ssbo_atomic_intrinsic(ir_call * ir)1063 lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
1064 {
1065    exec_list& params = ir->actual_parameters;
1066 
1067    if (params.length() < 2 || params.length() > 3)
1068       return ir;
1069 
1070    ir_rvalue *rvalue =
1071       ((ir_instruction *) params.get_head())->as_rvalue();
1072    if (!rvalue)
1073       return ir;
1074 
1075    ir_variable *var = rvalue->variable_referenced();
1076    if (!var || !var->is_in_shader_storage_block())
1077       return ir;
1078 
1079    const enum ir_intrinsic_id id = ir->callee->intrinsic_id;
1080    if (id == ir_intrinsic_generic_atomic_add ||
1081        id == ir_intrinsic_generic_atomic_min ||
1082        id == ir_intrinsic_generic_atomic_max ||
1083        id == ir_intrinsic_generic_atomic_and ||
1084        id == ir_intrinsic_generic_atomic_or ||
1085        id == ir_intrinsic_generic_atomic_xor ||
1086        id == ir_intrinsic_generic_atomic_exchange ||
1087        id == ir_intrinsic_generic_atomic_comp_swap) {
1088       return lower_ssbo_atomic_intrinsic(ir);
1089    }
1090 
1091    return ir;
1092 }
1093 
1094 
1095 ir_visitor_status
visit_enter(ir_call * ir)1096 lower_ubo_reference_visitor::visit_enter(ir_call *ir)
1097 {
1098    ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir);
1099    if (new_ir != ir) {
1100       progress = true;
1101       base_ir->replace_with(new_ir);
1102       return visit_continue_with_parent;
1103    }
1104 
1105    return rvalue_visit(ir);
1106 }
1107 
1108 
1109 ir_visitor_status
visit_enter(ir_texture * ir)1110 lower_ubo_reference_visitor::visit_enter(ir_texture *ir)
1111 {
1112    ir_dereference *sampler = ir->sampler;
1113 
1114    if (sampler->ir_type == ir_type_dereference_record) {
1115       handle_rvalue((ir_rvalue **)&ir->sampler);
1116       return visit_continue_with_parent;
1117    }
1118 
1119    return rvalue_visit(ir);
1120 }
1121 
1122 
1123 } /* unnamed namespace */
1124 
1125 void
lower_ubo_reference(struct gl_linked_shader * shader,bool clamp_block_indices,bool use_std430_as_default)1126 lower_ubo_reference(struct gl_linked_shader *shader,
1127                     bool clamp_block_indices, bool use_std430_as_default)
1128 {
1129    lower_ubo_reference_visitor v(shader, clamp_block_indices,
1130                                  use_std430_as_default);
1131 
1132    /* Loop over the instructions lowering references, because we take
1133     * a deref of a UBO array using a UBO dereference as the index will
1134     * produce a collection of instructions all of which have cloned
1135     * UBO dereferences for that array index.
1136     */
1137    do {
1138       v.progress = false;
1139       visit_list_elements(&v, shader->ir);
1140    } while (v.progress);
1141 }
1142