1 /*
2 * Copyright (c) 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_shared_reference.cpp
26 *
27 * IR lower pass to replace dereferences of compute shader shared variables
28 * with intrinsic function calls.
29 *
30 * This relieves drivers of the responsibility of allocating space for the
31 * shared variables in the shared memory region.
32 */
33
34 #include "lower_buffer_access.h"
35 #include "ir_builder.h"
36 #include "linker.h"
37 #include "main/macros.h"
38 #include "util/list.h"
39 #include "glsl_parser_extras.h"
40
41 using namespace ir_builder;
42
43 namespace {
44
45 struct var_offset {
46 struct list_head node;
47 const ir_variable *var;
48 unsigned offset;
49 };
50
51 class lower_shared_reference_visitor :
52 public lower_buffer_access::lower_buffer_access {
53 public:
54
lower_shared_reference_visitor(struct gl_linked_shader * shader)55 lower_shared_reference_visitor(struct gl_linked_shader *shader)
56 : list_ctx(ralloc_context(NULL)), shader(shader), shared_size(0u)
57 {
58 list_inithead(&var_offsets);
59 }
60
~lower_shared_reference_visitor()61 ~lower_shared_reference_visitor()
62 {
63 ralloc_free(list_ctx);
64 }
65
66 enum {
67 shared_load_access,
68 shared_store_access,
69 shared_atomic_access,
70 } buffer_access_type;
71
72 void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
73 const glsl_type *type, ir_rvalue *offset,
74 unsigned mask, int channel);
75
76 void handle_rvalue(ir_rvalue **rvalue);
77 ir_visitor_status visit_enter(ir_assignment *ir);
78 void handle_assignment(ir_assignment *ir);
79
80 ir_call *lower_shared_atomic_intrinsic(ir_call *ir);
81 ir_call *check_for_shared_atomic_intrinsic(ir_call *ir);
82 ir_visitor_status visit_enter(ir_call *ir);
83
84 unsigned get_shared_offset(const ir_variable *);
85
86 ir_call *shared_load(void *mem_ctx, const struct glsl_type *type,
87 ir_rvalue *offset);
88 ir_call *shared_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
89 unsigned write_mask);
90
91 void *list_ctx;
92 struct gl_linked_shader *shader;
93 struct list_head var_offsets;
94 unsigned shared_size;
95 bool progress;
96 };
97
98 unsigned
get_shared_offset(const ir_variable * var)99 lower_shared_reference_visitor::get_shared_offset(const ir_variable *var)
100 {
101 list_for_each_entry(var_offset, var_entry, &var_offsets, node) {
102 if (var_entry->var == var)
103 return var_entry->offset;
104 }
105
106 struct var_offset *new_entry = rzalloc(list_ctx, struct var_offset);
107 list_add(&new_entry->node, &var_offsets);
108 new_entry->var = var;
109
110 unsigned var_align = var->type->std430_base_alignment(false);
111 new_entry->offset = glsl_align(shared_size, var_align);
112
113 unsigned var_size = var->type->std430_size(false);
114 shared_size = new_entry->offset + var_size;
115
116 return new_entry->offset;
117 }
118
119 void
handle_rvalue(ir_rvalue ** rvalue)120 lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
121 {
122 if (!*rvalue)
123 return;
124
125 ir_dereference *deref = (*rvalue)->as_dereference();
126 if (!deref)
127 return;
128
129 ir_variable *var = deref->variable_referenced();
130 if (!var || var->data.mode != ir_var_shader_shared)
131 return;
132
133 buffer_access_type = shared_load_access;
134
135 void *mem_ctx = ralloc_parent(shader->ir);
136
137 ir_rvalue *offset = NULL;
138 unsigned const_offset = get_shared_offset(var);
139 bool row_major;
140 const glsl_type *matrix_type;
141 assert(var->get_interface_type() == NULL);
142 const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430;
143
144 setup_buffer_access(mem_ctx, deref,
145 &offset, &const_offset,
146 &row_major, &matrix_type, NULL, packing);
147
148 /* Now that we've calculated the offset to the start of the
149 * dereference, walk over the type and emit loads into a temporary.
150 */
151 const glsl_type *type = (*rvalue)->type;
152 ir_variable *load_var = new(mem_ctx) ir_variable(type,
153 "shared_load_temp",
154 ir_var_temporary);
155 base_ir->insert_before(load_var);
156
157 ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
158 "shared_load_temp_offset",
159 ir_var_temporary);
160 base_ir->insert_before(load_offset);
161 base_ir->insert_before(assign(load_offset, offset));
162
163 deref = new(mem_ctx) ir_dereference_variable(load_var);
164
165 emit_access(mem_ctx, false, deref, load_offset, const_offset, row_major,
166 matrix_type, packing, 0);
167
168 *rvalue = deref;
169
170 progress = true;
171 }
172
173 void
handle_assignment(ir_assignment * ir)174 lower_shared_reference_visitor::handle_assignment(ir_assignment *ir)
175 {
176 if (!ir || !ir->lhs)
177 return;
178
179 ir_rvalue *rvalue = ir->lhs->as_rvalue();
180 if (!rvalue)
181 return;
182
183 ir_dereference *deref = ir->lhs->as_dereference();
184 if (!deref)
185 return;
186
187 ir_variable *var = ir->lhs->variable_referenced();
188 if (!var || var->data.mode != ir_var_shader_shared)
189 return;
190
191 buffer_access_type = shared_store_access;
192
193 /* We have a write to a shared variable, so declare a temporary and rewrite
194 * the assignment so that the temporary is the LHS.
195 */
196 void *mem_ctx = ralloc_parent(shader->ir);
197
198 const glsl_type *type = rvalue->type;
199 ir_variable *store_var = new(mem_ctx) ir_variable(type,
200 "shared_store_temp",
201 ir_var_temporary);
202 base_ir->insert_before(store_var);
203 ir->lhs = new(mem_ctx) ir_dereference_variable(store_var);
204
205 ir_rvalue *offset = NULL;
206 unsigned const_offset = get_shared_offset(var);
207 bool row_major;
208 const glsl_type *matrix_type;
209 assert(var->get_interface_type() == NULL);
210 const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430;
211
212 setup_buffer_access(mem_ctx, deref,
213 &offset, &const_offset,
214 &row_major, &matrix_type, NULL, packing);
215
216 deref = new(mem_ctx) ir_dereference_variable(store_var);
217
218 ir_variable *store_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
219 "shared_store_temp_offset",
220 ir_var_temporary);
221 base_ir->insert_before(store_offset);
222 base_ir->insert_before(assign(store_offset, offset));
223
224 /* Now we have to write the value assigned to the temporary back to memory */
225 emit_access(mem_ctx, true, deref, store_offset, const_offset, row_major,
226 matrix_type, packing, ir->write_mask);
227
228 progress = true;
229 }
230
231 ir_visitor_status
visit_enter(ir_assignment * ir)232 lower_shared_reference_visitor::visit_enter(ir_assignment *ir)
233 {
234 handle_assignment(ir);
235 return rvalue_visit(ir);
236 }
237
238 void
insert_buffer_access(void * mem_ctx,ir_dereference * deref,const glsl_type * type,ir_rvalue * offset,unsigned mask,int)239 lower_shared_reference_visitor::insert_buffer_access(void *mem_ctx,
240 ir_dereference *deref,
241 const glsl_type *type,
242 ir_rvalue *offset,
243 unsigned mask,
244 int /* channel */)
245 {
246 if (buffer_access_type == shared_store_access) {
247 ir_call *store = shared_store(mem_ctx, deref, offset, mask);
248 base_ir->insert_after(store);
249 } else {
250 ir_call *load = shared_load(mem_ctx, type, offset);
251 base_ir->insert_before(load);
252 ir_rvalue *value = load->return_deref->as_rvalue()->clone(mem_ctx, NULL);
253 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
254 value));
255 }
256 }
257
258 static bool
compute_shader_enabled(const _mesa_glsl_parse_state * state)259 compute_shader_enabled(const _mesa_glsl_parse_state *state)
260 {
261 return state->stage == MESA_SHADER_COMPUTE;
262 }
263
264 ir_call *
shared_store(void * mem_ctx,ir_rvalue * deref,ir_rvalue * offset,unsigned write_mask)265 lower_shared_reference_visitor::shared_store(void *mem_ctx,
266 ir_rvalue *deref,
267 ir_rvalue *offset,
268 unsigned write_mask)
269 {
270 exec_list sig_params;
271
272 ir_variable *offset_ref = new(mem_ctx)
273 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
274 sig_params.push_tail(offset_ref);
275
276 ir_variable *val_ref = new(mem_ctx)
277 ir_variable(deref->type, "value" , ir_var_function_in);
278 sig_params.push_tail(val_ref);
279
280 ir_variable *writemask_ref = new(mem_ctx)
281 ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
282 sig_params.push_tail(writemask_ref);
283
284 ir_function_signature *sig = new(mem_ctx)
285 ir_function_signature(glsl_type::void_type, compute_shader_enabled);
286 assert(sig);
287 sig->replace_parameters(&sig_params);
288 sig->intrinsic_id = ir_intrinsic_shared_store;
289
290 ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_shared");
291 f->add_signature(sig);
292
293 exec_list call_params;
294 call_params.push_tail(offset->clone(mem_ctx, NULL));
295 call_params.push_tail(deref->clone(mem_ctx, NULL));
296 call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
297 return new(mem_ctx) ir_call(sig, NULL, &call_params);
298 }
299
300 ir_call *
shared_load(void * mem_ctx,const struct glsl_type * type,ir_rvalue * offset)301 lower_shared_reference_visitor::shared_load(void *mem_ctx,
302 const struct glsl_type *type,
303 ir_rvalue *offset)
304 {
305 exec_list sig_params;
306
307 ir_variable *offset_ref = new(mem_ctx)
308 ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
309 sig_params.push_tail(offset_ref);
310
311 ir_function_signature *sig =
312 new(mem_ctx) ir_function_signature(type, compute_shader_enabled);
313 assert(sig);
314 sig->replace_parameters(&sig_params);
315 sig->intrinsic_id = ir_intrinsic_shared_load;
316
317 ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_shared");
318 f->add_signature(sig);
319
320 ir_variable *result = new(mem_ctx)
321 ir_variable(type, "shared_load_result", ir_var_temporary);
322 base_ir->insert_before(result);
323 ir_dereference_variable *deref_result = new(mem_ctx)
324 ir_dereference_variable(result);
325
326 exec_list call_params;
327 call_params.push_tail(offset->clone(mem_ctx, NULL));
328
329 return new(mem_ctx) ir_call(sig, deref_result, &call_params);
330 }
331
332 /* Lowers the intrinsic call to a new internal intrinsic that swaps the access
333 * to the shared variable in the first parameter by an offset. This involves
334 * creating the new internal intrinsic (i.e. the new function signature).
335 */
336 ir_call *
lower_shared_atomic_intrinsic(ir_call * ir)337 lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir)
338 {
339 /* Shared atomics usually have 2 parameters, the shared variable and an
340 * integer argument. The exception is CompSwap, that has an additional
341 * integer parameter.
342 */
343 int param_count = ir->actual_parameters.length();
344 assert(param_count == 2 || param_count == 3);
345
346 /* First argument must be a scalar integer shared variable */
347 exec_node *param = ir->actual_parameters.get_head();
348 ir_instruction *inst = (ir_instruction *) param;
349 assert(inst->ir_type == ir_type_dereference_variable ||
350 inst->ir_type == ir_type_dereference_array ||
351 inst->ir_type == ir_type_dereference_record ||
352 inst->ir_type == ir_type_swizzle);
353
354 ir_rvalue *deref = (ir_rvalue *) inst;
355 assert(deref->type->is_scalar() && deref->type->is_integer());
356
357 ir_variable *var = deref->variable_referenced();
358 assert(var);
359
360 /* Compute the offset to the start if the dereference
361 */
362 void *mem_ctx = ralloc_parent(shader->ir);
363
364 ir_rvalue *offset = NULL;
365 unsigned const_offset = get_shared_offset(var);
366 bool row_major;
367 const glsl_type *matrix_type;
368 assert(var->get_interface_type() == NULL);
369 const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430;
370 buffer_access_type = shared_atomic_access;
371
372 setup_buffer_access(mem_ctx, deref,
373 &offset, &const_offset,
374 &row_major, &matrix_type, NULL, packing);
375
376 assert(offset);
377 assert(!row_major);
378 assert(matrix_type == NULL);
379
380 ir_rvalue *deref_offset =
381 add(offset, new(mem_ctx) ir_constant(const_offset));
382
383 /* Create the new internal function signature that will take an offset
384 * instead of a shared variable
385 */
386 exec_list sig_params;
387 ir_variable *sig_param = new(mem_ctx)
388 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
389 sig_params.push_tail(sig_param);
390
391 const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
392 glsl_type::int_type : glsl_type::uint_type;
393 sig_param = new(mem_ctx)
394 ir_variable(type, "data1", ir_var_function_in);
395 sig_params.push_tail(sig_param);
396
397 if (param_count == 3) {
398 sig_param = new(mem_ctx)
399 ir_variable(type, "data2", ir_var_function_in);
400 sig_params.push_tail(sig_param);
401 }
402
403 ir_function_signature *sig =
404 new(mem_ctx) ir_function_signature(deref->type,
405 compute_shader_enabled);
406 assert(sig);
407 sig->replace_parameters(&sig_params);
408
409 assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load);
410 assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap);
411 sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, shared);
412
413 char func_name[64];
414 sprintf(func_name, "%s_shared", ir->callee_name());
415 ir_function *f = new(mem_ctx) ir_function(func_name);
416 f->add_signature(sig);
417
418 /* Now, create the call to the internal intrinsic */
419 exec_list call_params;
420 call_params.push_tail(deref_offset);
421 param = ir->actual_parameters.get_head()->get_next();
422 ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
423 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
424 if (param_count == 3) {
425 param = param->get_next();
426 param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
427 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
428 }
429 ir_dereference_variable *return_deref =
430 ir->return_deref->clone(mem_ctx, NULL);
431 return new(mem_ctx) ir_call(sig, return_deref, &call_params);
432 }
433
434 ir_call *
check_for_shared_atomic_intrinsic(ir_call * ir)435 lower_shared_reference_visitor::check_for_shared_atomic_intrinsic(ir_call *ir)
436 {
437 exec_list& params = ir->actual_parameters;
438
439 if (params.length() < 2 || params.length() > 3)
440 return ir;
441
442 ir_rvalue *rvalue =
443 ((ir_instruction *) params.get_head())->as_rvalue();
444 if (!rvalue)
445 return ir;
446
447 ir_variable *var = rvalue->variable_referenced();
448 if (!var || var->data.mode != ir_var_shader_shared)
449 return ir;
450
451 const enum ir_intrinsic_id id = ir->callee->intrinsic_id;
452 if (id == ir_intrinsic_generic_atomic_add ||
453 id == ir_intrinsic_generic_atomic_min ||
454 id == ir_intrinsic_generic_atomic_max ||
455 id == ir_intrinsic_generic_atomic_and ||
456 id == ir_intrinsic_generic_atomic_or ||
457 id == ir_intrinsic_generic_atomic_xor ||
458 id == ir_intrinsic_generic_atomic_exchange ||
459 id == ir_intrinsic_generic_atomic_comp_swap) {
460 return lower_shared_atomic_intrinsic(ir);
461 }
462
463 return ir;
464 }
465
466 ir_visitor_status
visit_enter(ir_call * ir)467 lower_shared_reference_visitor::visit_enter(ir_call *ir)
468 {
469 ir_call *new_ir = check_for_shared_atomic_intrinsic(ir);
470 if (new_ir != ir) {
471 progress = true;
472 base_ir->replace_with(new_ir);
473 return visit_continue_with_parent;
474 }
475
476 return rvalue_visit(ir);
477 }
478
479 } /* unnamed namespace */
480
481 void
lower_shared_reference(struct gl_context * ctx,struct gl_shader_program * prog,struct gl_linked_shader * shader)482 lower_shared_reference(struct gl_context *ctx,
483 struct gl_shader_program *prog,
484 struct gl_linked_shader *shader)
485 {
486 if (shader->Stage != MESA_SHADER_COMPUTE)
487 return;
488
489 lower_shared_reference_visitor v(shader);
490
491 /* Loop over the instructions lowering references, because we take a deref
492 * of an shared variable array using a shared variable dereference as the
493 * index will produce a collection of instructions all of which have cloned
494 * shared variable dereferences for that array index.
495 */
496 do {
497 v.progress = false;
498 visit_list_elements(&v, shader->ir);
499 } while (v.progress);
500
501 prog->Comp.SharedSize = v.shared_size;
502
503 /* Section 19.1 (Compute Shader Variables) of the OpenGL 4.5 (Core Profile)
504 * specification says:
505 *
506 * "There is a limit to the total size of all variables declared as
507 * shared in a single program object. This limit, expressed in units of
508 * basic machine units, may be queried as the value of
509 * MAX_COMPUTE_SHARED_MEMORY_SIZE."
510 */
511 if (prog->Comp.SharedSize > ctx->Const.MaxComputeSharedMemorySize) {
512 linker_error(prog, "Too much shared memory used (%u/%u)\n",
513 prog->Comp.SharedSize,
514 ctx->Const.MaxComputeSharedMemorySize);
515 }
516 }
517