1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "compiler/brw_nir.h"
25 #include "compiler/glsl/ir_uniform.h"
26 #include "compiler/nir/nir_builder.h"
27 #include "brw_program.h"
28
29 static void
brw_nir_setup_glsl_builtin_uniform(nir_variable * var,const struct gl_program * prog,struct brw_stage_prog_data * stage_prog_data,bool is_scalar)30 brw_nir_setup_glsl_builtin_uniform(nir_variable *var,
31 const struct gl_program *prog,
32 struct brw_stage_prog_data *stage_prog_data,
33 bool is_scalar)
34 {
35 const nir_state_slot *const slots = var->state_slots;
36 assert(var->state_slots != NULL);
37
38 unsigned uniform_index = var->data.driver_location / 4;
39 for (unsigned int i = 0; i < var->num_state_slots; i++) {
40 /* This state reference has already been setup by ir_to_mesa, but we'll
41 * get the same index back here.
42 */
43 int index = _mesa_add_state_reference(prog->Parameters,
44 slots[i].tokens);
45
46 /* Add each of the unique swizzles of the element as a parameter.
47 * This'll end up matching the expected layout of the
48 * array/matrix/structure we're trying to fill in.
49 */
50 int last_swiz = -1;
51 for (unsigned j = 0; j < 4; j++) {
52 int swiz = GET_SWZ(slots[i].swizzle, j);
53
54 /* If we hit a pair of identical swizzles, this means we've hit the
55 * end of the builtin variable. In scalar mode, we should just quit
56 * and move on to the next one. In vec4, we need to continue and pad
57 * it out to 4 components.
58 */
59 if (swiz == last_swiz && is_scalar)
60 break;
61
62 last_swiz = swiz;
63
64 stage_prog_data->param[uniform_index++] =
65 BRW_PARAM_PARAMETER(index, swiz);
66 }
67 }
68 }
69
70 static void
setup_vec4_image_param(uint32_t * params,uint32_t idx,unsigned offset,unsigned n)71 setup_vec4_image_param(uint32_t *params, uint32_t idx,
72 unsigned offset, unsigned n)
73 {
74 assert(offset % sizeof(uint32_t) == 0);
75 for (unsigned i = 0; i < n; ++i)
76 params[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
77
78 for (unsigned i = n; i < 4; ++i)
79 params[i] = BRW_PARAM_BUILTIN_ZERO;
80 }
81
82 static void
brw_setup_image_uniform_values(nir_variable * var,struct brw_stage_prog_data * prog_data)83 brw_setup_image_uniform_values(nir_variable *var,
84 struct brw_stage_prog_data *prog_data)
85 {
86 unsigned param_start_index = var->data.driver_location / 4;
87 uint32_t *param = &prog_data->param[param_start_index];
88 unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
89
90 for (unsigned i = 0; i < num_images; i++) {
91 const unsigned image_idx = var->data.binding + i;
92
93 /* Upload the brw_image_param structure. The order is expected to match
94 * the BRW_IMAGE_PARAM_*_OFFSET defines.
95 */
96 setup_vec4_image_param(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
97 image_idx,
98 offsetof(brw_image_param, offset), 2);
99 setup_vec4_image_param(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
100 image_idx,
101 offsetof(brw_image_param, size), 3);
102 setup_vec4_image_param(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
103 image_idx,
104 offsetof(brw_image_param, stride), 4);
105 setup_vec4_image_param(param + BRW_IMAGE_PARAM_TILING_OFFSET,
106 image_idx,
107 offsetof(brw_image_param, tiling), 3);
108 setup_vec4_image_param(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
109 image_idx,
110 offsetof(brw_image_param, swizzling), 2);
111 param += BRW_IMAGE_PARAM_SIZE;
112 }
113 }
114
115 static unsigned
count_uniform_storage_slots(const struct glsl_type * type)116 count_uniform_storage_slots(const struct glsl_type *type)
117 {
118 /* gl_uniform_storage can cope with one level of array, so if the
119 * type is a composite type or an array where each element occupies
120 * more than one slot than we need to recursively process it.
121 */
122 if (glsl_type_is_struct_or_ifc(type)) {
123 unsigned location_count = 0;
124
125 for (unsigned i = 0; i < glsl_get_length(type); i++) {
126 const struct glsl_type *field_type = glsl_get_struct_field(type, i);
127
128 location_count += count_uniform_storage_slots(field_type);
129 }
130
131 return location_count;
132 }
133
134 if (glsl_type_is_array(type)) {
135 const struct glsl_type *element_type = glsl_get_array_element(type);
136
137 if (glsl_type_is_array(element_type) ||
138 glsl_type_is_struct_or_ifc(element_type)) {
139 unsigned element_count = count_uniform_storage_slots(element_type);
140 return element_count * glsl_get_length(type);
141 }
142 }
143
144 return 1;
145 }
146
147 static void
brw_nir_setup_glsl_uniform(gl_shader_stage stage,nir_variable * var,const struct gl_program * prog,struct brw_stage_prog_data * stage_prog_data,bool is_scalar)148 brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
149 const struct gl_program *prog,
150 struct brw_stage_prog_data *stage_prog_data,
151 bool is_scalar)
152 {
153 if (var->type->without_array()->is_sampler())
154 return;
155
156 if (var->type->without_array()->is_image()) {
157 brw_setup_image_uniform_values(var, stage_prog_data);
158 return;
159 }
160
161 /* The data for our (non-builtin) uniforms is stored in a series of
162 * gl_uniform_storage structs for each subcomponent that
163 * glGetUniformLocation() could name. We know it's been set up in the same
164 * order we'd walk the type, so walk the list of storage that matches the
165 * range of slots covered by this variable.
166 */
167 unsigned uniform_index = var->data.driver_location / 4;
168 unsigned num_slots = count_uniform_storage_slots(var->type);
169 for (unsigned u = 0; u < num_slots; u++) {
170 struct gl_uniform_storage *storage =
171 &prog->sh.data->UniformStorage[var->data.location + u];
172
173 /* We already handled samplers and images via the separate top-level
174 * variables created by gl_nir_lower_samplers_as_deref(), but they're
175 * still part of the structure's storage, and so we'll see them while
176 * walking it to set up the other regular fields. Just skip over them.
177 */
178 if (storage->builtin ||
179 storage->type->is_sampler() ||
180 storage->type->is_image())
181 continue;
182
183 gl_constant_value *components = storage->storage;
184 unsigned vector_count = (MAX2(storage->array_elements, 1) *
185 storage->type->matrix_columns);
186 unsigned vector_size = storage->type->vector_elements;
187 unsigned max_vector_size = 4;
188 if (storage->type->base_type == GLSL_TYPE_DOUBLE ||
189 storage->type->base_type == GLSL_TYPE_UINT64 ||
190 storage->type->base_type == GLSL_TYPE_INT64) {
191 vector_size *= 2;
192 if (vector_size > 4)
193 max_vector_size = 8;
194 }
195
196 for (unsigned s = 0; s < vector_count; s++) {
197 unsigned i;
198 for (i = 0; i < vector_size; i++) {
199 uint32_t idx = components - prog->sh.data->UniformDataSlots;
200 stage_prog_data->param[uniform_index++] = BRW_PARAM_UNIFORM(idx);
201 components++;
202 }
203
204 if (!is_scalar) {
205 /* Pad out with zeros if needed (only needed for vec4) */
206 for (; i < max_vector_size; i++) {
207 stage_prog_data->param[uniform_index++] =
208 BRW_PARAM_BUILTIN_ZERO;
209 }
210 }
211 }
212 }
213 }
214
215 void
brw_nir_setup_glsl_uniforms(void * mem_ctx,nir_shader * shader,const struct gl_program * prog,struct brw_stage_prog_data * stage_prog_data,bool is_scalar)216 brw_nir_setup_glsl_uniforms(void *mem_ctx, nir_shader *shader,
217 const struct gl_program *prog,
218 struct brw_stage_prog_data *stage_prog_data,
219 bool is_scalar)
220 {
221 unsigned nr_params = shader->num_uniforms / 4;
222 stage_prog_data->nr_params = nr_params;
223 stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
224
225 nir_foreach_uniform_variable(var, shader) {
226 /* UBO's, atomics and samplers don't take up space in the
227 uniform file */
228 if (var->interface_type != NULL || var->type->contains_atomic())
229 continue;
230
231 if (var->num_state_slots > 0) {
232 brw_nir_setup_glsl_builtin_uniform(var, prog, stage_prog_data,
233 is_scalar);
234 } else {
235 brw_nir_setup_glsl_uniform(shader->info.stage, var, prog,
236 stage_prog_data, is_scalar);
237 }
238 }
239 }
240
241 void
brw_nir_setup_arb_uniforms(void * mem_ctx,nir_shader * shader,struct gl_program * prog,struct brw_stage_prog_data * stage_prog_data)242 brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
243 struct gl_program *prog,
244 struct brw_stage_prog_data *stage_prog_data)
245 {
246 struct gl_program_parameter_list *plist = prog->Parameters;
247
248 unsigned nr_params = plist->NumParameters * 4;
249 stage_prog_data->nr_params = nr_params;
250 stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
251
252 /* For ARB programs, prog_to_nir generates a single "parameters" variable
253 * for all uniform data. There may be additional sampler variables, and
254 * an extra uniform from nir_lower_wpos_ytransform.
255 */
256
257 for (unsigned p = 0; p < plist->NumParameters; p++) {
258 /* Parameters should be either vec4 uniforms or single component
259 * constants; matrices and other larger types should have been broken
260 * down earlier.
261 */
262 assert(plist->Parameters[p].Size <= 4);
263
264 unsigned i;
265 for (i = 0; i < plist->Parameters[p].Size; i++)
266 stage_prog_data->param[4 * p + i] = BRW_PARAM_PARAMETER(p, i);
267 for (; i < 4; i++)
268 stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO;
269 }
270 }
271
272 static nir_ssa_def *
get_aoa_deref_offset(nir_builder * b,nir_deref_instr * deref,unsigned elem_size)273 get_aoa_deref_offset(nir_builder *b,
274 nir_deref_instr *deref,
275 unsigned elem_size)
276 {
277 unsigned array_size = elem_size;
278 nir_ssa_def *offset = nir_imm_int(b, 0);
279
280 while (deref->deref_type != nir_deref_type_var) {
281 assert(deref->deref_type == nir_deref_type_array);
282
283 /* This level's element size is the previous level's array size */
284 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
285 assert(deref->arr.index.ssa);
286 offset = nir_iadd(b, offset,
287 nir_imul(b, index, nir_imm_int(b, array_size)));
288
289 deref = nir_deref_instr_parent(deref);
290 assert(glsl_type_is_array(deref->type));
291 array_size *= glsl_get_length(deref->type);
292 }
293
294 /* Accessing an invalid surface index with the dataport can result in a
295 * hang. According to the spec "if the index used to select an individual
296 * element is negative or greater than or equal to the size of the array,
297 * the results of the operation are undefined but may not lead to
298 * termination" -- which is one of the possible outcomes of the hang.
299 * Clamp the index to prevent access outside of the array bounds.
300 */
301 return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
302 }
303
304 void
brw_nir_lower_gl_images(nir_shader * shader,const struct gl_program * prog)305 brw_nir_lower_gl_images(nir_shader *shader,
306 const struct gl_program *prog)
307 {
308 /* We put image uniforms at the end */
309 nir_foreach_uniform_variable(var, shader) {
310 if (!var->type->contains_image())
311 continue;
312
313 /* GL Only allows arrays of arrays of images */
314 assert(var->type->without_array()->is_image());
315 const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
316
317 var->data.driver_location = shader->num_uniforms;
318 shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4;
319 }
320
321 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
322
323 nir_builder b;
324 nir_builder_init(&b, impl);
325
326 nir_foreach_block(block, impl) {
327 nir_foreach_instr_safe(instr, block) {
328 if (instr->type != nir_instr_type_intrinsic)
329 continue;
330
331 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
332 switch (intrin->intrinsic) {
333 case nir_intrinsic_image_deref_load:
334 case nir_intrinsic_image_deref_store:
335 case nir_intrinsic_image_deref_atomic_add:
336 case nir_intrinsic_image_deref_atomic_imin:
337 case nir_intrinsic_image_deref_atomic_umin:
338 case nir_intrinsic_image_deref_atomic_imax:
339 case nir_intrinsic_image_deref_atomic_umax:
340 case nir_intrinsic_image_deref_atomic_and:
341 case nir_intrinsic_image_deref_atomic_or:
342 case nir_intrinsic_image_deref_atomic_xor:
343 case nir_intrinsic_image_deref_atomic_exchange:
344 case nir_intrinsic_image_deref_atomic_comp_swap:
345 case nir_intrinsic_image_deref_size:
346 case nir_intrinsic_image_deref_samples:
347 case nir_intrinsic_image_deref_load_raw_intel:
348 case nir_intrinsic_image_deref_store_raw_intel: {
349 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
350 nir_variable *var = nir_deref_instr_get_variable(deref);
351
352 struct gl_uniform_storage *storage =
353 &prog->sh.data->UniformStorage[var->data.location];
354 const unsigned image_var_idx =
355 storage->opaque[shader->info.stage].index;
356
357 b.cursor = nir_before_instr(&intrin->instr);
358 nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx),
359 get_aoa_deref_offset(&b, deref, 1));
360 nir_rewrite_image_intrinsic(intrin, index, false);
361 break;
362 }
363
364 case nir_intrinsic_image_deref_load_param_intel: {
365 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
366 nir_variable *var = nir_deref_instr_get_variable(deref);
367 const unsigned num_images =
368 MAX2(1, var->type->arrays_of_arrays_size());
369
370 b.cursor = nir_instr_remove(&intrin->instr);
371
372 const unsigned param = nir_intrinsic_base(intrin);
373 nir_ssa_def *offset =
374 get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4);
375 offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16));
376
377 nir_intrinsic_instr *load =
378 nir_intrinsic_instr_create(b.shader,
379 nir_intrinsic_load_uniform);
380 nir_intrinsic_set_base(load, var->data.driver_location);
381 nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4);
382 load->src[0] = nir_src_for_ssa(offset);
383 load->num_components = intrin->dest.ssa.num_components;
384 nir_ssa_dest_init(&load->instr, &load->dest,
385 intrin->dest.ssa.num_components,
386 intrin->dest.ssa.bit_size, NULL);
387 nir_builder_instr_insert(&b, &load->instr);
388
389 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
390 &load->dest.ssa);
391 break;
392 }
393
394 default:
395 break;
396 }
397 }
398 }
399 }
400
401 void
brw_nir_lower_legacy_clipping(nir_shader * nir,int nr_userclip_plane_consts,struct brw_stage_prog_data * prog_data)402 brw_nir_lower_legacy_clipping(nir_shader *nir, int nr_userclip_plane_consts,
403 struct brw_stage_prog_data *prog_data)
404 {
405 if (nr_userclip_plane_consts == 0)
406 return;
407
408 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
409
410 nir_lower_clip_vs(nir, (1 << nr_userclip_plane_consts) - 1, true, false,
411 NULL);
412 nir_lower_io_to_temporaries(nir, impl, true, false);
413 nir_lower_global_vars_to_local(nir);
414 nir_lower_vars_to_ssa(nir);
415
416 const unsigned clip_plane_base = nir->num_uniforms;
417
418 assert(nir->num_uniforms == prog_data->nr_params * 4);
419 const unsigned num_clip_floats = 4 * nr_userclip_plane_consts;
420 uint32_t *clip_param =
421 brw_stage_prog_data_add_params(prog_data, num_clip_floats);
422 nir->num_uniforms += num_clip_floats * sizeof(float);
423 assert(nir->num_uniforms == prog_data->nr_params * 4);
424
425 for (unsigned i = 0; i < num_clip_floats; i++)
426 clip_param[i] = BRW_PARAM_BUILTIN_CLIP_PLANE(i / 4, i % 4);
427
428 nir_builder b;
429 nir_builder_init(&b, impl);
430 nir_foreach_block(block, impl) {
431 nir_foreach_instr_safe(instr, block) {
432 if (instr->type != nir_instr_type_intrinsic)
433 continue;
434
435 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
436 if (intrin->intrinsic != nir_intrinsic_load_user_clip_plane)
437 continue;
438
439 b.cursor = nir_before_instr(instr);
440
441 nir_intrinsic_instr *load =
442 nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
443 load->num_components = 4;
444 load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
445 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
446 nir_intrinsic_set_base(load, clip_plane_base + 4 * sizeof(float) *
447 nir_intrinsic_ucp_id(intrin));
448 nir_intrinsic_set_range(load, 4 * sizeof(float));
449 nir_builder_instr_insert(&b, &load->instr);
450
451 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
452 &load->dest.ssa);
453 nir_instr_remove(instr);
454 }
455 }
456 }
457