1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_ubo_reference.cpp
26 *
27 * IR lower pass to replace dereferences of variables in a uniform
28 * buffer object with usage of ir_binop_ubo_load expressions, each of
29 * which can read data up to the size of a vec4.
30 *
31 * This relieves drivers of the responsibility to deal with tricky UBO
32 * layout issues like std140 structures and row_major matrices on
33 * their own.
34 */
35
36 #include "lower_buffer_access.h"
37 #include "ir_builder.h"
38 #include "main/macros.h"
39 #include "glsl_parser_extras.h"
40
41 using namespace ir_builder;
42
43 namespace {
44 class lower_ubo_reference_visitor :
45 public lower_buffer_access::lower_buffer_access {
46 public:
lower_ubo_reference_visitor(struct gl_linked_shader * shader,bool clamp_block_indices)47 lower_ubo_reference_visitor(struct gl_linked_shader *shader,
48 bool clamp_block_indices)
49 : shader(shader), clamp_block_indices(clamp_block_indices),
50 struct_field(NULL), variable(NULL)
51 {
52 }
53
54 void handle_rvalue(ir_rvalue **rvalue);
55 ir_visitor_status visit_enter(ir_assignment *ir);
56
57 void setup_for_load_or_store(void *mem_ctx,
58 ir_variable *var,
59 ir_rvalue *deref,
60 ir_rvalue **offset,
61 unsigned *const_offset,
62 bool *row_major,
63 int *matrix_columns,
64 enum glsl_interface_packing packing);
65 uint32_t ssbo_access_params();
66 ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
67 ir_rvalue *offset);
68 ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
69 ir_rvalue *offset);
70
71 bool check_for_buffer_array_copy(ir_assignment *ir);
72 bool check_for_buffer_struct_copy(ir_assignment *ir);
73 void check_for_ssbo_store(ir_assignment *ir);
74 void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var,
75 ir_variable *write_var, unsigned write_mask);
76 ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
77 unsigned write_mask);
78
79 enum {
80 ubo_load_access,
81 ssbo_load_access,
82 ssbo_store_access,
83 ssbo_unsized_array_length_access,
84 ssbo_atomic_access,
85 } buffer_access_type;
86
87 void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
88 const glsl_type *type, ir_rvalue *offset,
89 unsigned mask, int channel);
90
91 ir_visitor_status visit_enter(class ir_expression *);
92 ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
93 void check_ssbo_unsized_array_length_expression(class ir_expression *);
94 void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
95
96 ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
97 ir_dereference *,
98 ir_variable *);
99 ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx);
100
101 unsigned calculate_unsized_array_stride(ir_dereference *deref,
102 enum glsl_interface_packing packing);
103
104 ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
105 ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
106 ir_visitor_status visit_enter(ir_call *ir);
107
108 struct gl_linked_shader *shader;
109 bool clamp_block_indices;
110 const struct glsl_struct_field *struct_field;
111 ir_variable *variable;
112 ir_rvalue *uniform_block;
113 bool progress;
114 };
115
116 /**
117 * Determine the name of the interface block field
118 *
119 * This is the name of the specific member as it would appear in the
120 * \c gl_uniform_buffer_variable::Name field in the shader's
121 * \c UniformBlocks array.
122 */
123 static const char *
interface_field_name(void * mem_ctx,char * base_name,ir_rvalue * d,ir_rvalue ** nonconst_block_index)124 interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
125 ir_rvalue **nonconst_block_index)
126 {
127 *nonconst_block_index = NULL;
128 char *name_copy = NULL;
129 size_t base_length = 0;
130
131 /* Loop back through the IR until we find the uniform block */
132 ir_rvalue *ir = d;
133 while (ir != NULL) {
134 switch (ir->ir_type) {
135 case ir_type_dereference_variable: {
136 /* Exit loop */
137 ir = NULL;
138 break;
139 }
140
141 case ir_type_dereference_record: {
142 ir_dereference_record *r = (ir_dereference_record *) ir;
143 ir = r->record->as_dereference();
144
145 /* If we got here it means any previous array subscripts belong to
146 * block members and not the block itself so skip over them in the
147 * next pass.
148 */
149 d = ir;
150 break;
151 }
152
153 case ir_type_dereference_array: {
154 ir_dereference_array *a = (ir_dereference_array *) ir;
155 ir = a->array->as_dereference();
156 break;
157 }
158
159 case ir_type_swizzle: {
160 ir_swizzle *s = (ir_swizzle *) ir;
161 ir = s->val->as_dereference();
162 /* Skip swizzle in the next pass */
163 d = ir;
164 break;
165 }
166
167 default:
168 assert(!"Should not get here.");
169 break;
170 }
171 }
172
173 while (d != NULL) {
174 switch (d->ir_type) {
175 case ir_type_dereference_variable: {
176 ir_dereference_variable *v = (ir_dereference_variable *) d;
177 if (name_copy != NULL &&
178 v->var->is_interface_instance() &&
179 v->var->type->is_array()) {
180 return name_copy;
181 } else {
182 *nonconst_block_index = NULL;
183 return base_name;
184 }
185
186 break;
187 }
188
189 case ir_type_dereference_array: {
190 ir_dereference_array *a = (ir_dereference_array *) d;
191 size_t new_length;
192
193 if (name_copy == NULL) {
194 name_copy = ralloc_strdup(mem_ctx, base_name);
195 base_length = strlen(name_copy);
196 }
197
198 /* For arrays of arrays we start at the innermost array and work our
199 * way out so we need to insert the subscript at the base of the
200 * name string rather than just attaching it to the end.
201 */
202 new_length = base_length;
203 ir_constant *const_index = a->array_index->as_constant();
204 char *end = ralloc_strdup(NULL, &name_copy[new_length]);
205 if (!const_index) {
206 ir_rvalue *array_index = a->array_index;
207 if (array_index->type != glsl_type::uint_type)
208 array_index = i2u(array_index);
209
210 if (a->array->type->is_array() &&
211 a->array->type->fields.array->is_array()) {
212 ir_constant *base_size = new(mem_ctx)
213 ir_constant(a->array->type->fields.array->arrays_of_arrays_size());
214 array_index = mul(array_index, base_size);
215 }
216
217 if (*nonconst_block_index) {
218 *nonconst_block_index = add(*nonconst_block_index, array_index);
219 } else {
220 *nonconst_block_index = array_index;
221 }
222
223 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s",
224 end);
225 } else {
226 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s",
227 const_index->get_uint_component(0),
228 end);
229 }
230 ralloc_free(end);
231
232 d = a->array->as_dereference();
233
234 break;
235 }
236
237 default:
238 assert(!"Should not get here.");
239 break;
240 }
241 }
242
243 assert(!"Should not get here.");
244 return NULL;
245 }
246
247 static ir_rvalue *
clamp_to_array_bounds(void * mem_ctx,ir_rvalue * index,const glsl_type * type)248 clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type)
249 {
250 assert(type->is_array());
251
252 const unsigned array_size = type->arrays_of_arrays_size();
253
254 ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1);
255 max_index->type = index->type;
256
257 ir_constant *zero = new(mem_ctx) ir_constant(0);
258 zero->type = index->type;
259
260 if (index->type->base_type == GLSL_TYPE_INT)
261 index = max2(index, zero);
262 index = min2(index, max_index);
263
264 return index;
265 }
266
267 void
setup_for_load_or_store(void * mem_ctx,ir_variable * var,ir_rvalue * deref,ir_rvalue ** offset,unsigned * const_offset,bool * row_major,int * matrix_columns,enum glsl_interface_packing packing)268 lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
269 ir_variable *var,
270 ir_rvalue *deref,
271 ir_rvalue **offset,
272 unsigned *const_offset,
273 bool *row_major,
274 int *matrix_columns,
275 enum glsl_interface_packing packing)
276 {
277 /* Determine the name of the interface block */
278 ir_rvalue *nonconst_block_index;
279 const char *const field_name =
280 interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
281 deref, &nonconst_block_index);
282
283 if (nonconst_block_index && clamp_block_indices) {
284 nonconst_block_index =
285 clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type);
286 }
287
288 /* Locate the block by interface name */
289 unsigned num_blocks;
290 struct gl_uniform_block **blocks;
291 if (this->buffer_access_type != ubo_load_access) {
292 num_blocks = shader->Program->info.num_ssbos;
293 blocks = shader->Program->sh.ShaderStorageBlocks;
294 } else {
295 num_blocks = shader->Program->info.num_ubos;
296 blocks = shader->Program->sh.UniformBlocks;
297 }
298 this->uniform_block = NULL;
299 for (unsigned i = 0; i < num_blocks; i++) {
300 if (strcmp(field_name, blocks[i]->Name) == 0) {
301
302 ir_constant *index = new(mem_ctx) ir_constant(i);
303
304 if (nonconst_block_index) {
305 this->uniform_block = add(nonconst_block_index, index);
306 } else {
307 this->uniform_block = index;
308 }
309
310 if (var->is_interface_instance()) {
311 *const_offset = 0;
312 } else {
313 *const_offset = blocks[i]->Uniforms[var->data.location].Offset;
314 }
315
316 break;
317 }
318 }
319
320 assert(this->uniform_block);
321
322 this->struct_field = NULL;
323 setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major,
324 matrix_columns, &this->struct_field, packing);
325 }
326
327 void
handle_rvalue(ir_rvalue ** rvalue)328 lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
329 {
330 if (!*rvalue)
331 return;
332
333 ir_dereference *deref = (*rvalue)->as_dereference();
334 if (!deref)
335 return;
336
337 ir_variable *var = deref->variable_referenced();
338 if (!var || !var->is_in_buffer_block())
339 return;
340
341 void *mem_ctx = ralloc_parent(shader->ir);
342
343 ir_rvalue *offset = NULL;
344 unsigned const_offset;
345 bool row_major;
346 int matrix_columns;
347 enum glsl_interface_packing packing = var->get_interface_type_packing();
348
349 this->buffer_access_type =
350 var->is_in_shader_storage_block() ?
351 ssbo_load_access : ubo_load_access;
352 this->variable = var;
353
354 /* Compute the offset to the start if the dereference as well as other
355 * information we need to configure the write
356 */
357 setup_for_load_or_store(mem_ctx, var, deref,
358 &offset, &const_offset,
359 &row_major, &matrix_columns,
360 packing);
361 assert(offset);
362
363 /* Now that we've calculated the offset to the start of the
364 * dereference, walk over the type and emit loads into a temporary.
365 */
366 const glsl_type *type = (*rvalue)->type;
367 ir_variable *load_var = new(mem_ctx) ir_variable(type,
368 "ubo_load_temp",
369 ir_var_temporary);
370 base_ir->insert_before(load_var);
371
372 ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
373 "ubo_load_temp_offset",
374 ir_var_temporary);
375 base_ir->insert_before(load_offset);
376 base_ir->insert_before(assign(load_offset, offset));
377
378 deref = new(mem_ctx) ir_dereference_variable(load_var);
379 emit_access(mem_ctx, false, deref, load_offset, const_offset,
380 row_major, matrix_columns, packing, 0);
381 *rvalue = deref;
382
383 progress = true;
384 }
385
386 ir_expression *
ubo_load(void * mem_ctx,const glsl_type * type,ir_rvalue * offset)387 lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
388 const glsl_type *type,
389 ir_rvalue *offset)
390 {
391 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
392 return new(mem_ctx)
393 ir_expression(ir_binop_ubo_load,
394 type,
395 block_ref,
396 offset);
397
398 }
399
400 static bool
shader_storage_buffer_object(const _mesa_glsl_parse_state * state)401 shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
402 {
403 return state->has_shader_storage_buffer_objects();
404 }
405
406 uint32_t
ssbo_access_params()407 lower_ubo_reference_visitor::ssbo_access_params()
408 {
409 assert(variable);
410
411 if (variable->is_interface_instance()) {
412 assert(struct_field);
413
414 return ((struct_field->image_coherent ? ACCESS_COHERENT : 0) |
415 (struct_field->image_restrict ? ACCESS_RESTRICT : 0) |
416 (struct_field->image_volatile ? ACCESS_VOLATILE : 0));
417 } else {
418 return ((variable->data.image_coherent ? ACCESS_COHERENT : 0) |
419 (variable->data.image_restrict ? ACCESS_RESTRICT : 0) |
420 (variable->data.image_volatile ? ACCESS_VOLATILE : 0));
421 }
422 }
423
424 ir_call *
ssbo_store(void * mem_ctx,ir_rvalue * deref,ir_rvalue * offset,unsigned write_mask)425 lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
426 ir_rvalue *deref,
427 ir_rvalue *offset,
428 unsigned write_mask)
429 {
430 exec_list sig_params;
431
432 ir_variable *block_ref = new(mem_ctx)
433 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
434 sig_params.push_tail(block_ref);
435
436 ir_variable *offset_ref = new(mem_ctx)
437 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
438 sig_params.push_tail(offset_ref);
439
440 ir_variable *val_ref = new(mem_ctx)
441 ir_variable(deref->type, "value" , ir_var_function_in);
442 sig_params.push_tail(val_ref);
443
444 ir_variable *writemask_ref = new(mem_ctx)
445 ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
446 sig_params.push_tail(writemask_ref);
447
448 ir_variable *access_ref = new(mem_ctx)
449 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
450 sig_params.push_tail(access_ref);
451
452 ir_function_signature *sig = new(mem_ctx)
453 ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
454 assert(sig);
455 sig->replace_parameters(&sig_params);
456 sig->intrinsic_id = ir_intrinsic_ssbo_store;
457
458 ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo");
459 f->add_signature(sig);
460
461 exec_list call_params;
462 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
463 call_params.push_tail(offset->clone(mem_ctx, NULL));
464 call_params.push_tail(deref->clone(mem_ctx, NULL));
465 call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
466 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
467 return new(mem_ctx) ir_call(sig, NULL, &call_params);
468 }
469
470 ir_call *
ssbo_load(void * mem_ctx,const struct glsl_type * type,ir_rvalue * offset)471 lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
472 const struct glsl_type *type,
473 ir_rvalue *offset)
474 {
475 exec_list sig_params;
476
477 ir_variable *block_ref = new(mem_ctx)
478 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
479 sig_params.push_tail(block_ref);
480
481 ir_variable *offset_ref = new(mem_ctx)
482 ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
483 sig_params.push_tail(offset_ref);
484
485 ir_variable *access_ref = new(mem_ctx)
486 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
487 sig_params.push_tail(access_ref);
488
489 ir_function_signature *sig =
490 new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
491 assert(sig);
492 sig->replace_parameters(&sig_params);
493 sig->intrinsic_id = ir_intrinsic_ssbo_load;
494
495 ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo");
496 f->add_signature(sig);
497
498 ir_variable *result = new(mem_ctx)
499 ir_variable(type, "ssbo_load_result", ir_var_temporary);
500 base_ir->insert_before(result);
501 ir_dereference_variable *deref_result = new(mem_ctx)
502 ir_dereference_variable(result);
503
504 exec_list call_params;
505 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
506 call_params.push_tail(offset->clone(mem_ctx, NULL));
507 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
508
509 return new(mem_ctx) ir_call(sig, deref_result, &call_params);
510 }
511
512 void
insert_buffer_access(void * mem_ctx,ir_dereference * deref,const glsl_type * type,ir_rvalue * offset,unsigned mask,int channel)513 lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx,
514 ir_dereference *deref,
515 const glsl_type *type,
516 ir_rvalue *offset,
517 unsigned mask,
518 int channel)
519 {
520 switch (this->buffer_access_type) {
521 case ubo_load_access:
522 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
523 ubo_load(mem_ctx, type, offset),
524 mask));
525 break;
526 case ssbo_load_access: {
527 ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset);
528 base_ir->insert_before(load_ssbo);
529 ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
530 ir_assignment *assignment =
531 assign(deref->clone(mem_ctx, NULL), value, mask);
532 base_ir->insert_before(assignment);
533 break;
534 }
535 case ssbo_store_access:
536 if (channel >= 0) {
537 base_ir->insert_after(ssbo_store(mem_ctx,
538 swizzle(deref, channel, 1),
539 offset, 1));
540 } else {
541 base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask));
542 }
543 break;
544 default:
545 unreachable("invalid buffer_access_type in insert_buffer_access");
546 }
547 }
548
549 void
write_to_memory(void * mem_ctx,ir_dereference * deref,ir_variable * var,ir_variable * write_var,unsigned write_mask)550 lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
551 ir_dereference *deref,
552 ir_variable *var,
553 ir_variable *write_var,
554 unsigned write_mask)
555 {
556 ir_rvalue *offset = NULL;
557 unsigned const_offset;
558 bool row_major;
559 int matrix_columns;
560 enum glsl_interface_packing packing = var->get_interface_type_packing();
561
562 this->buffer_access_type = ssbo_store_access;
563 this->variable = var;
564
565 /* Compute the offset to the start if the dereference as well as other
566 * information we need to configure the write
567 */
568 setup_for_load_or_store(mem_ctx, var, deref,
569 &offset, &const_offset,
570 &row_major, &matrix_columns,
571 packing);
572 assert(offset);
573
574 /* Now emit writes from the temporary to memory */
575 ir_variable *write_offset =
576 new(mem_ctx) ir_variable(glsl_type::uint_type,
577 "ssbo_store_temp_offset",
578 ir_var_temporary);
579
580 base_ir->insert_before(write_offset);
581 base_ir->insert_before(assign(write_offset, offset));
582
583 deref = new(mem_ctx) ir_dereference_variable(write_var);
584 emit_access(mem_ctx, true, deref, write_offset, const_offset,
585 row_major, matrix_columns, packing, write_mask);
586 }
587
588 ir_visitor_status
visit_enter(ir_expression * ir)589 lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
590 {
591 check_ssbo_unsized_array_length_expression(ir);
592 return rvalue_visit(ir);
593 }
594
595 ir_expression *
calculate_ssbo_unsized_array_length(ir_expression * expr)596 lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr)
597 {
598 if (expr->operation !=
599 ir_expression_operation(ir_unop_ssbo_unsized_array_length))
600 return NULL;
601
602 ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
603 if (!rvalue ||
604 !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
605 return NULL;
606
607 ir_dereference *deref = expr->operands[0]->as_dereference();
608 if (!deref)
609 return NULL;
610
611 ir_variable *var = expr->operands[0]->variable_referenced();
612 if (!var || !var->is_in_shader_storage_block())
613 return NULL;
614 return process_ssbo_unsized_array_length(&rvalue, deref, var);
615 }
616
617 void
check_ssbo_unsized_array_length_expression(ir_expression * ir)618 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir)
619 {
620 if (ir->operation ==
621 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
622 /* Don't replace this unop if it is found alone. It is going to be
623 * removed by the optimization passes or replaced if it is part of
624 * an ir_assignment or another ir_expression.
625 */
626 return;
627 }
628
629 for (unsigned i = 0; i < ir->get_num_operands(); i++) {
630 if (ir->operands[i]->ir_type != ir_type_expression)
631 continue;
632 ir_expression *expr = (ir_expression *) ir->operands[i];
633 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
634 if (!temp)
635 continue;
636
637 delete expr;
638 ir->operands[i] = temp;
639 }
640 }
641
642 void
check_ssbo_unsized_array_length_assignment(ir_assignment * ir)643 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
644 {
645 if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
646 return;
647
648 ir_expression *expr = (ir_expression *) ir->rhs;
649 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
650 if (!temp)
651 return;
652
653 delete expr;
654 ir->rhs = temp;
655 return;
656 }
657
658 ir_expression *
emit_ssbo_get_buffer_size(void * mem_ctx)659 lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx)
660 {
661 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
662 return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
663 glsl_type::int_type,
664 block_ref);
665 }
666
667 unsigned
calculate_unsized_array_stride(ir_dereference * deref,enum glsl_interface_packing packing)668 lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref,
669 enum glsl_interface_packing packing)
670 {
671 unsigned array_stride = 0;
672
673 switch (deref->ir_type) {
674 case ir_type_dereference_variable:
675 {
676 ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
677 const struct glsl_type *unsized_array_type = NULL;
678 /* An unsized array can be sized by other lowering passes, so pick
679 * the first field of the array which has the data type of the unsized
680 * array.
681 */
682 unsized_array_type = deref_var->var->type->fields.array;
683
684 /* Whether or not the field is row-major (because it might be a
685 * bvec2 or something) does not affect the array itself. We need
686 * to know whether an array element in its entirety is row-major.
687 */
688 const bool array_row_major =
689 is_dereferenced_thing_row_major(deref_var);
690
691 if (packing == GLSL_INTERFACE_PACKING_STD430) {
692 array_stride = unsized_array_type->std430_array_stride(array_row_major);
693 } else {
694 array_stride = unsized_array_type->std140_size(array_row_major);
695 array_stride = glsl_align(array_stride, 16);
696 }
697 break;
698 }
699 case ir_type_dereference_record:
700 {
701 ir_dereference_record *deref_record = (ir_dereference_record *) deref;
702 ir_dereference *interface_deref =
703 deref_record->record->as_dereference();
704 assert(interface_deref != NULL);
705 const struct glsl_type *interface_type = interface_deref->type;
706 unsigned record_length = interface_type->length;
707 /* Unsized array is always the last element of the interface */
708 const struct glsl_type *unsized_array_type =
709 interface_type->fields.structure[record_length - 1].type->fields.array;
710
711 const bool array_row_major =
712 is_dereferenced_thing_row_major(deref_record);
713
714 if (packing == GLSL_INTERFACE_PACKING_STD430) {
715 array_stride = unsized_array_type->std430_array_stride(array_row_major);
716 } else {
717 array_stride = unsized_array_type->std140_size(array_row_major);
718 array_stride = glsl_align(array_stride, 16);
719 }
720 break;
721 }
722 default:
723 unreachable("Unsupported dereference type");
724 }
725 return array_stride;
726 }
727
728 ir_expression *
process_ssbo_unsized_array_length(ir_rvalue ** rvalue,ir_dereference * deref,ir_variable * var)729 lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
730 ir_dereference *deref,
731 ir_variable *var)
732 {
733 void *mem_ctx = ralloc_parent(*rvalue);
734
735 ir_rvalue *base_offset = NULL;
736 unsigned const_offset;
737 bool row_major;
738 int matrix_columns;
739 enum glsl_interface_packing packing = var->get_interface_type_packing();
740 int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
741
742 this->buffer_access_type = ssbo_unsized_array_length_access;
743 this->variable = var;
744
745 /* Compute the offset to the start if the dereference as well as other
746 * information we need to calculate the length.
747 */
748 setup_for_load_or_store(mem_ctx, var, deref,
749 &base_offset, &const_offset,
750 &row_major, &matrix_columns,
751 packing);
752 /* array.length() =
753 * max((buffer_object_size - offset_of_array) / stride_of_array, 0)
754 */
755 ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx);
756
757 ir_expression *offset_of_array = new(mem_ctx)
758 ir_expression(ir_binop_add, base_offset,
759 new(mem_ctx) ir_constant(const_offset));
760 ir_expression *offset_of_array_int = new(mem_ctx)
761 ir_expression(ir_unop_u2i, offset_of_array);
762
763 ir_expression *sub = new(mem_ctx)
764 ir_expression(ir_binop_sub, buffer_size, offset_of_array_int);
765 ir_expression *div = new(mem_ctx)
766 ir_expression(ir_binop_div, sub,
767 new(mem_ctx) ir_constant(unsized_array_stride));
768 ir_expression *max = new(mem_ctx)
769 ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0));
770
771 return max;
772 }
773
774 void
check_for_ssbo_store(ir_assignment * ir)775 lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
776 {
777 if (!ir || !ir->lhs)
778 return;
779
780 ir_rvalue *rvalue = ir->lhs->as_rvalue();
781 if (!rvalue)
782 return;
783
784 ir_dereference *deref = ir->lhs->as_dereference();
785 if (!deref)
786 return;
787
788 ir_variable *var = ir->lhs->variable_referenced();
789 if (!var || !var->is_in_shader_storage_block())
790 return;
791
792 /* We have a write to a buffer variable, so declare a temporary and rewrite
793 * the assignment so that the temporary is the LHS.
794 */
795 void *mem_ctx = ralloc_parent(shader->ir);
796
797 const glsl_type *type = rvalue->type;
798 ir_variable *write_var = new(mem_ctx) ir_variable(type,
799 "ssbo_store_temp",
800 ir_var_temporary);
801 base_ir->insert_before(write_var);
802 ir->lhs = new(mem_ctx) ir_dereference_variable(write_var);
803
804 /* Now we have to write the value assigned to the temporary back to memory */
805 write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask);
806 progress = true;
807 }
808
809 static bool
is_buffer_backed_variable(ir_variable * var)810 is_buffer_backed_variable(ir_variable *var)
811 {
812 return var->is_in_buffer_block() ||
813 var->data.mode == ir_var_shader_shared;
814 }
815
816 bool
check_for_buffer_array_copy(ir_assignment * ir)817 lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
818 {
819 if (!ir || !ir->lhs || !ir->rhs)
820 return false;
821
822 /* LHS and RHS must be arrays
823 * FIXME: arrays of arrays?
824 */
825 if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array())
826 return false;
827
828 /* RHS must be a buffer-backed variable. This is what can cause the problem
829 * since it would lead to a series of loads that need to live until we
830 * see the writes to the LHS.
831 */
832 ir_variable *rhs_var = ir->rhs->variable_referenced();
833 if (!rhs_var || !is_buffer_backed_variable(rhs_var))
834 return false;
835
836 /* Split the array copy into individual element copies to reduce
837 * register pressure
838 */
839 ir_dereference *rhs_deref = ir->rhs->as_dereference();
840 if (!rhs_deref)
841 return false;
842
843 ir_dereference *lhs_deref = ir->lhs->as_dereference();
844 if (!lhs_deref)
845 return false;
846
847 assert(lhs_deref->type->length == rhs_deref->type->length);
848 void *mem_ctx = ralloc_parent(shader->ir);
849
850 for (unsigned i = 0; i < lhs_deref->type->length; i++) {
851 ir_dereference *lhs_i =
852 new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
853 new(mem_ctx) ir_constant(i));
854
855 ir_dereference *rhs_i =
856 new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
857 new(mem_ctx) ir_constant(i));
858 ir->insert_after(assign(lhs_i, rhs_i));
859 }
860
861 ir->remove();
862 progress = true;
863 return true;
864 }
865
866 bool
check_for_buffer_struct_copy(ir_assignment * ir)867 lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir)
868 {
869 if (!ir || !ir->lhs || !ir->rhs)
870 return false;
871
872 /* LHS and RHS must be records */
873 if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record())
874 return false;
875
876 /* RHS must be a buffer-backed variable. This is what can cause the problem
877 * since it would lead to a series of loads that need to live until we
878 * see the writes to the LHS.
879 */
880 ir_variable *rhs_var = ir->rhs->variable_referenced();
881 if (!rhs_var || !is_buffer_backed_variable(rhs_var))
882 return false;
883
884 /* Split the struct copy into individual element copies to reduce
885 * register pressure
886 */
887 ir_dereference *rhs_deref = ir->rhs->as_dereference();
888 if (!rhs_deref)
889 return false;
890
891 ir_dereference *lhs_deref = ir->lhs->as_dereference();
892 if (!lhs_deref)
893 return false;
894
895 assert(lhs_deref->type->record_compare(rhs_deref->type));
896 void *mem_ctx = ralloc_parent(shader->ir);
897
898 for (unsigned i = 0; i < lhs_deref->type->length; i++) {
899 const char *field_name = lhs_deref->type->fields.structure[i].name;
900 ir_dereference *lhs_field =
901 new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL),
902 field_name);
903 ir_dereference *rhs_field =
904 new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL),
905 field_name);
906 ir->insert_after(assign(lhs_field, rhs_field));
907 }
908
909 ir->remove();
910 progress = true;
911 return true;
912 }
913
914 ir_visitor_status
visit_enter(ir_assignment * ir)915 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
916 {
917 /* Array and struct copies could involve large amounts of load/store
918 * operations. To improve register pressure we want to special-case
919 * these and split them into individual element copies.
920 * This way we avoid emitting all the loads for the RHS first and
921 * all the writes for the LHS second and register usage is more
922 * efficient.
923 */
924 if (check_for_buffer_array_copy(ir))
925 return visit_continue_with_parent;
926
927 if (check_for_buffer_struct_copy(ir))
928 return visit_continue_with_parent;
929
930 check_ssbo_unsized_array_length_assignment(ir);
931 check_for_ssbo_store(ir);
932 return rvalue_visit(ir);
933 }
934
935 /* Lowers the intrinsic call to a new internal intrinsic that swaps the
936 * access to the buffer variable in the first parameter by an offset
937 * and block index. This involves creating the new internal intrinsic
938 * (i.e. the new function signature).
939 */
940 ir_call *
lower_ssbo_atomic_intrinsic(ir_call * ir)941 lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
942 {
943 /* SSBO atomics usually have 2 parameters, the buffer variable and an
944 * integer argument. The exception is CompSwap, that has an additional
945 * integer parameter.
946 */
947 int param_count = ir->actual_parameters.length();
948 assert(param_count == 2 || param_count == 3);
949
950 /* First argument must be a scalar integer buffer variable */
951 exec_node *param = ir->actual_parameters.get_head();
952 ir_instruction *inst = (ir_instruction *) param;
953 assert(inst->ir_type == ir_type_dereference_variable ||
954 inst->ir_type == ir_type_dereference_array ||
955 inst->ir_type == ir_type_dereference_record ||
956 inst->ir_type == ir_type_swizzle);
957
958 ir_rvalue *deref = (ir_rvalue *) inst;
959 assert(deref->type->is_scalar() && deref->type->is_integer());
960
961 ir_variable *var = deref->variable_referenced();
962 assert(var);
963
964 /* Compute the offset to the start if the dereference and the
965 * block index
966 */
967 void *mem_ctx = ralloc_parent(shader->ir);
968
969 ir_rvalue *offset = NULL;
970 unsigned const_offset;
971 bool row_major;
972 int matrix_columns;
973 enum glsl_interface_packing packing = var->get_interface_type_packing();
974
975 this->buffer_access_type = ssbo_atomic_access;
976 this->variable = var;
977
978 setup_for_load_or_store(mem_ctx, var, deref,
979 &offset, &const_offset,
980 &row_major, &matrix_columns,
981 packing);
982 assert(offset);
983 assert(!row_major);
984 assert(matrix_columns == 1);
985
986 ir_rvalue *deref_offset =
987 add(offset, new(mem_ctx) ir_constant(const_offset));
988 ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
989
990 /* Create the new internal function signature that will take a block
991 * index and offset instead of a buffer variable
992 */
993 exec_list sig_params;
994 ir_variable *sig_param = new(mem_ctx)
995 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
996 sig_params.push_tail(sig_param);
997
998 sig_param = new(mem_ctx)
999 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
1000 sig_params.push_tail(sig_param);
1001
1002 const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
1003 glsl_type::int_type : glsl_type::uint_type;
1004 sig_param = new(mem_ctx)
1005 ir_variable(type, "data1", ir_var_function_in);
1006 sig_params.push_tail(sig_param);
1007
1008 if (param_count == 3) {
1009 sig_param = new(mem_ctx)
1010 ir_variable(type, "data2", ir_var_function_in);
1011 sig_params.push_tail(sig_param);
1012 }
1013
1014 ir_function_signature *sig =
1015 new(mem_ctx) ir_function_signature(deref->type,
1016 shader_storage_buffer_object);
1017 assert(sig);
1018 sig->replace_parameters(&sig_params);
1019
1020 assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load);
1021 assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap);
1022 sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, ssbo);
1023
1024 char func_name[64];
1025 sprintf(func_name, "%s_ssbo", ir->callee_name());
1026 ir_function *f = new(mem_ctx) ir_function(func_name);
1027 f->add_signature(sig);
1028
1029 /* Now, create the call to the internal intrinsic */
1030 exec_list call_params;
1031 call_params.push_tail(block_index);
1032 call_params.push_tail(deref_offset);
1033 param = ir->actual_parameters.get_head()->get_next();
1034 ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1035 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1036 if (param_count == 3) {
1037 param = param->get_next();
1038 param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1039 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1040 }
1041 ir_dereference_variable *return_deref =
1042 ir->return_deref->clone(mem_ctx, NULL);
1043 return new(mem_ctx) ir_call(sig, return_deref, &call_params);
1044 }
1045
1046 ir_call *
check_for_ssbo_atomic_intrinsic(ir_call * ir)1047 lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
1048 {
1049 exec_list& params = ir->actual_parameters;
1050
1051 if (params.length() < 2 || params.length() > 3)
1052 return ir;
1053
1054 ir_rvalue *rvalue =
1055 ((ir_instruction *) params.get_head())->as_rvalue();
1056 if (!rvalue)
1057 return ir;
1058
1059 ir_variable *var = rvalue->variable_referenced();
1060 if (!var || !var->is_in_shader_storage_block())
1061 return ir;
1062
1063 const enum ir_intrinsic_id id = ir->callee->intrinsic_id;
1064 if (id == ir_intrinsic_generic_atomic_add ||
1065 id == ir_intrinsic_generic_atomic_min ||
1066 id == ir_intrinsic_generic_atomic_max ||
1067 id == ir_intrinsic_generic_atomic_and ||
1068 id == ir_intrinsic_generic_atomic_or ||
1069 id == ir_intrinsic_generic_atomic_xor ||
1070 id == ir_intrinsic_generic_atomic_exchange ||
1071 id == ir_intrinsic_generic_atomic_comp_swap) {
1072 return lower_ssbo_atomic_intrinsic(ir);
1073 }
1074
1075 return ir;
1076 }
1077
1078
1079 ir_visitor_status
visit_enter(ir_call * ir)1080 lower_ubo_reference_visitor::visit_enter(ir_call *ir)
1081 {
1082 ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir);
1083 if (new_ir != ir) {
1084 progress = true;
1085 base_ir->replace_with(new_ir);
1086 return visit_continue_with_parent;
1087 }
1088
1089 return rvalue_visit(ir);
1090 }
1091
1092
1093 } /* unnamed namespace */
1094
1095 void
lower_ubo_reference(struct gl_linked_shader * shader,bool clamp_block_indices)1096 lower_ubo_reference(struct gl_linked_shader *shader, bool clamp_block_indices)
1097 {
1098 lower_ubo_reference_visitor v(shader, clamp_block_indices);
1099
1100 /* Loop over the instructions lowering references, because we take
1101 * a deref of a UBO array using a UBO dereference as the index will
1102 * produce a collection of instructions all of which have cloned
1103 * UBO dereferences for that array index.
1104 */
1105 do {
1106 v.progress = false;
1107 visit_list_elements(&v, shader->ir);
1108 } while (v.progress);
1109 }
1110