1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /*
25 * This lowering pass converts references to input/output variables with
26 * loads/stores to actual input/output intrinsics.
27 */
28
29 #include "nir.h"
30 #include "nir_builder.h"
31 #include "nir_deref.h"
32 #include "nir_xfb_info.h"
33
34 #include "util/u_math.h"
35
36 struct lower_io_state {
37 void *dead_ctx;
38 nir_builder builder;
39 int (*type_size)(const struct glsl_type *type, bool);
40 nir_variable_mode modes;
41 nir_lower_io_options options;
42 };
43
44 static nir_intrinsic_op
ssbo_atomic_for_deref(nir_intrinsic_op deref_op)45 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
46 {
47 switch (deref_op) {
48 case nir_intrinsic_deref_atomic:
49 return nir_intrinsic_ssbo_atomic;
50 case nir_intrinsic_deref_atomic_swap:
51 return nir_intrinsic_ssbo_atomic_swap;
52 default:
53 unreachable("Invalid SSBO atomic");
54 }
55 }
56
57 static nir_intrinsic_op
global_atomic_for_deref(nir_address_format addr_format,nir_intrinsic_op deref_op)58 global_atomic_for_deref(nir_address_format addr_format,
59 nir_intrinsic_op deref_op)
60 {
61 switch (deref_op) {
62 case nir_intrinsic_deref_atomic:
63 if (addr_format != nir_address_format_2x32bit_global)
64 return nir_intrinsic_global_atomic;
65 else
66 return nir_intrinsic_global_atomic_2x32;
67
68 case nir_intrinsic_deref_atomic_swap:
69 if (addr_format != nir_address_format_2x32bit_global)
70 return nir_intrinsic_global_atomic_swap;
71 else
72 return nir_intrinsic_global_atomic_swap_2x32;
73
74 default:
75 unreachable("Invalid SSBO atomic");
76 }
77 }
78
79 static nir_intrinsic_op
shared_atomic_for_deref(nir_intrinsic_op deref_op)80 shared_atomic_for_deref(nir_intrinsic_op deref_op)
81 {
82 switch (deref_op) {
83 case nir_intrinsic_deref_atomic:
84 return nir_intrinsic_shared_atomic;
85 case nir_intrinsic_deref_atomic_swap:
86 return nir_intrinsic_shared_atomic_swap;
87 default:
88 unreachable("Invalid shared atomic");
89 }
90 }
91
92 static nir_intrinsic_op
task_payload_atomic_for_deref(nir_intrinsic_op deref_op)93 task_payload_atomic_for_deref(nir_intrinsic_op deref_op)
94 {
95 switch (deref_op) {
96 case nir_intrinsic_deref_atomic:
97 return nir_intrinsic_task_payload_atomic;
98 case nir_intrinsic_deref_atomic_swap:
99 return nir_intrinsic_task_payload_atomic_swap;
100 default:
101 unreachable("Invalid task payload atomic");
102 }
103 }
104
105 void
nir_assign_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,int (* type_size)(const struct glsl_type *,bool))106 nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
107 unsigned *size,
108 int (*type_size)(const struct glsl_type *, bool))
109 {
110 unsigned location = 0;
111
112 nir_foreach_variable_with_modes(var, shader, mode) {
113 var->data.driver_location = location;
114 bool bindless_type_size = var->data.mode == nir_var_shader_in ||
115 var->data.mode == nir_var_shader_out ||
116 var->data.bindless;
117 location += type_size(var->type, bindless_type_size);
118 }
119
120 *size = location;
121 }
122
123 /**
124 * Some inputs and outputs are arrayed, meaning that there is an extra level
125 * of array indexing to handle mismatches between the shader interface and the
126 * dispatch pattern of the shader. For instance, geometry shaders are
127 * executed per-primitive while their inputs and outputs are specified
128 * per-vertex so all inputs and outputs have to be additionally indexed with
129 * the vertex index within the primitive.
130 */
131 bool
nir_is_arrayed_io(const nir_variable * var,gl_shader_stage stage)132 nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage)
133 {
134 if (var->data.patch || !glsl_type_is_array(var->type))
135 return false;
136
137 if (stage == MESA_SHADER_MESH) {
138 /* NV_mesh_shader: this is flat array for the whole workgroup. */
139 if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES)
140 return var->data.per_primitive;
141 }
142
143 if (var->data.mode == nir_var_shader_in) {
144 if (var->data.per_vertex) {
145 assert(stage == MESA_SHADER_FRAGMENT);
146 return true;
147 }
148
149 return stage == MESA_SHADER_GEOMETRY ||
150 stage == MESA_SHADER_TESS_CTRL ||
151 stage == MESA_SHADER_TESS_EVAL;
152 }
153
154 if (var->data.mode == nir_var_shader_out)
155 return stage == MESA_SHADER_TESS_CTRL ||
156 stage == MESA_SHADER_MESH;
157
158 return false;
159 }
160
161 static bool
uses_high_dvec2_semantic(struct lower_io_state * state,const nir_variable * var)162 uses_high_dvec2_semantic(struct lower_io_state *state,
163 const nir_variable *var)
164 {
165 return state->builder.shader->info.stage == MESA_SHADER_VERTEX &&
166 state->options & nir_lower_io_lower_64bit_to_32_new &&
167 var->data.mode == nir_var_shader_in &&
168 glsl_type_is_dual_slot(glsl_without_array(var->type));
169 }
170
171 static unsigned
get_number_of_slots(struct lower_io_state * state,const nir_variable * var)172 get_number_of_slots(struct lower_io_state *state,
173 const nir_variable *var)
174 {
175 const struct glsl_type *type = var->type;
176
177 if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) {
178 assert(glsl_type_is_array(type));
179 type = glsl_get_array_element(type);
180 }
181
182 /* NV_mesh_shader:
183 * PRIMITIVE_INDICES is a flat array, not a proper arrayed output,
184 * as opposed to D3D-style mesh shaders where it's addressed by
185 * the primitive index.
186 * Prevent assigning several slots to primitive indices,
187 * to avoid some issues.
188 */
189 if (state->builder.shader->info.stage == MESA_SHADER_MESH &&
190 var->data.location == VARYING_SLOT_PRIMITIVE_INDICES &&
191 !nir_is_arrayed_io(var, state->builder.shader->info.stage))
192 return 1;
193
194 return state->type_size(type, var->data.bindless) /
195 (uses_high_dvec2_semantic(state, var) ? 2 : 1);
196 }
197
198 static nir_def *
get_io_offset(nir_builder * b,nir_deref_instr * deref,nir_def ** array_index,int (* type_size)(const struct glsl_type *,bool),unsigned * component,bool bts)199 get_io_offset(nir_builder *b, nir_deref_instr *deref,
200 nir_def **array_index,
201 int (*type_size)(const struct glsl_type *, bool),
202 unsigned *component, bool bts)
203 {
204 nir_deref_path path;
205 nir_deref_path_init(&path, deref, NULL);
206
207 assert(path.path[0]->deref_type == nir_deref_type_var);
208 nir_deref_instr **p = &path.path[1];
209
210 /* For arrayed I/O (e.g., per-vertex input arrays in geometry shader
211 * inputs), skip the outermost array index. Process the rest normally.
212 */
213 if (array_index != NULL) {
214 assert((*p)->deref_type == nir_deref_type_array);
215 *array_index = (*p)->arr.index.ssa;
216 p++;
217 }
218
219 if (path.path[0]->var->data.compact && nir_src_is_const((*p)->arr.index)) {
220 assert((*p)->deref_type == nir_deref_type_array);
221 assert(glsl_type_is_scalar((*p)->type));
222
223 /* We always lower indirect dereferences for "compact" array vars. */
224 const unsigned index = nir_src_as_uint((*p)->arr.index);
225 const unsigned total_offset = *component + index;
226 const unsigned slot_offset = total_offset / 4;
227 *component = total_offset % 4;
228 return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
229 }
230
231 /* Just emit code and let constant-folding go to town */
232 nir_def *offset = nir_imm_int(b, 0);
233
234 for (; *p; p++) {
235 if ((*p)->deref_type == nir_deref_type_array) {
236 unsigned size = type_size((*p)->type, bts);
237
238 nir_def *mul =
239 nir_amul_imm(b, (*p)->arr.index.ssa, size);
240
241 offset = nir_iadd(b, offset, mul);
242 } else if ((*p)->deref_type == nir_deref_type_struct) {
243 /* p starts at path[1], so this is safe */
244 nir_deref_instr *parent = *(p - 1);
245
246 unsigned field_offset = 0;
247 for (unsigned i = 0; i < (*p)->strct.index; i++) {
248 field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
249 }
250 offset = nir_iadd_imm(b, offset, field_offset);
251 } else {
252 unreachable("Unsupported deref type");
253 }
254 }
255
256 nir_deref_path_finish(&path);
257
258 return offset;
259 }
260
261 static nir_def *
emit_load(struct lower_io_state * state,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,unsigned num_components,unsigned bit_size,nir_alu_type dest_type,bool high_dvec2)262 emit_load(struct lower_io_state *state,
263 nir_def *array_index, nir_variable *var, nir_def *offset,
264 unsigned component, unsigned num_components, unsigned bit_size,
265 nir_alu_type dest_type, bool high_dvec2)
266 {
267 nir_builder *b = &state->builder;
268 const nir_shader *nir = b->shader;
269 nir_variable_mode mode = var->data.mode;
270 nir_def *barycentric = NULL;
271
272 nir_intrinsic_op op;
273 switch (mode) {
274 case nir_var_shader_in:
275 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
276 nir->options->use_interpolated_input_intrinsics &&
277 var->data.interpolation != INTERP_MODE_FLAT &&
278 !var->data.per_primitive) {
279 if (var->data.interpolation == INTERP_MODE_EXPLICIT ||
280 var->data.per_vertex) {
281 assert(array_index != NULL);
282 op = nir_intrinsic_load_input_vertex;
283 } else {
284 assert(array_index == NULL);
285
286 nir_intrinsic_op bary_op;
287 if (var->data.sample)
288 bary_op = nir_intrinsic_load_barycentric_sample;
289 else if (var->data.centroid)
290 bary_op = nir_intrinsic_load_barycentric_centroid;
291 else
292 bary_op = nir_intrinsic_load_barycentric_pixel;
293
294 barycentric = nir_load_barycentric(&state->builder, bary_op,
295 var->data.interpolation);
296 op = nir_intrinsic_load_interpolated_input;
297 }
298 } else {
299 op = array_index ? nir_intrinsic_load_per_vertex_input : nir_intrinsic_load_input;
300 }
301 break;
302 case nir_var_shader_out:
303 op = !array_index ? nir_intrinsic_load_output : var->data.per_primitive ? nir_intrinsic_load_per_primitive_output
304 : nir_intrinsic_load_per_vertex_output;
305 break;
306 case nir_var_uniform:
307 op = nir_intrinsic_load_uniform;
308 break;
309 default:
310 unreachable("Unknown variable mode");
311 }
312
313 nir_intrinsic_instr *load =
314 nir_intrinsic_instr_create(state->builder.shader, op);
315 load->num_components = num_components;
316
317 nir_intrinsic_set_base(load, var->data.driver_location);
318 if (nir_intrinsic_has_range(load)) {
319 const struct glsl_type *type = var->type;
320 if (array_index)
321 type = glsl_get_array_element(type);
322 unsigned var_size = state->type_size(type, var->data.bindless);
323 nir_intrinsic_set_range(load, var_size);
324 }
325
326 if (mode == nir_var_shader_in || mode == nir_var_shader_out)
327 nir_intrinsic_set_component(load, component);
328
329 if (nir_intrinsic_has_access(load))
330 nir_intrinsic_set_access(load, var->data.access);
331
332 nir_intrinsic_set_dest_type(load, dest_type);
333
334 if (load->intrinsic != nir_intrinsic_load_uniform) {
335 nir_io_semantics semantics = { 0 };
336 semantics.location = var->data.location;
337 semantics.num_slots = get_number_of_slots(state, var);
338 semantics.fb_fetch_output = var->data.fb_fetch_output;
339 semantics.medium_precision =
340 var->data.precision == GLSL_PRECISION_MEDIUM ||
341 var->data.precision == GLSL_PRECISION_LOW;
342 semantics.high_dvec2 = high_dvec2;
343 nir_intrinsic_set_io_semantics(load, semantics);
344 }
345
346 if (array_index) {
347 load->src[0] = nir_src_for_ssa(array_index);
348 load->src[1] = nir_src_for_ssa(offset);
349 } else if (barycentric) {
350 load->src[0] = nir_src_for_ssa(barycentric);
351 load->src[1] = nir_src_for_ssa(offset);
352 } else {
353 load->src[0] = nir_src_for_ssa(offset);
354 }
355
356 nir_def_init(&load->instr, &load->def, num_components, bit_size);
357 nir_builder_instr_insert(b, &load->instr);
358
359 return &load->def;
360 }
361
362 static nir_def *
lower_load(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,const struct glsl_type * type)363 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
364 nir_def *array_index, nir_variable *var, nir_def *offset,
365 unsigned component, const struct glsl_type *type)
366 {
367 const bool lower_double = !glsl_type_is_integer(type) && state->options & nir_lower_io_lower_64bit_float_to_32;
368 if (intrin->def.bit_size == 64 &&
369 (lower_double || (state->options & (nir_lower_io_lower_64bit_to_32_new |
370 nir_lower_io_lower_64bit_to_32)))) {
371 nir_builder *b = &state->builder;
372 bool use_high_dvec2_semantic = uses_high_dvec2_semantic(state, var);
373
374 /* Each slot is a dual slot, so divide the offset within the variable
375 * by 2.
376 */
377 if (use_high_dvec2_semantic)
378 offset = nir_ushr_imm(b, offset, 1);
379
380 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
381
382 nir_def *comp64[4];
383 assert(component == 0 || component == 2);
384 unsigned dest_comp = 0;
385 bool high_dvec2 = false;
386 while (dest_comp < intrin->def.num_components) {
387 const unsigned num_comps =
388 MIN2(intrin->def.num_components - dest_comp,
389 (4 - component) / 2);
390
391 nir_def *data32 =
392 emit_load(state, array_index, var, offset, component,
393 num_comps * 2, 32, nir_type_uint32, high_dvec2);
394 for (unsigned i = 0; i < num_comps; i++) {
395 comp64[dest_comp + i] =
396 nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
397 }
398
399 /* Only the first store has a component offset */
400 component = 0;
401 dest_comp += num_comps;
402
403 if (use_high_dvec2_semantic) {
404 /* Increment the offset when we wrap around the dual slot. */
405 if (high_dvec2)
406 offset = nir_iadd_imm(b, offset, slot_size);
407 high_dvec2 = !high_dvec2;
408 } else {
409 offset = nir_iadd_imm(b, offset, slot_size);
410 }
411 }
412
413 return nir_vec(b, comp64, intrin->def.num_components);
414 } else if (intrin->def.bit_size == 1) {
415 /* Booleans are 32-bit */
416 assert(glsl_type_is_boolean(type));
417 return nir_b2b1(&state->builder,
418 emit_load(state, array_index, var, offset, component,
419 intrin->def.num_components, 32,
420 nir_type_bool32, false));
421 } else {
422 return emit_load(state, array_index, var, offset, component,
423 intrin->def.num_components,
424 intrin->def.bit_size,
425 nir_get_nir_type_for_glsl_type(type), false);
426 }
427 }
428
429 static void
emit_store(struct lower_io_state * state,nir_def * data,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,unsigned num_components,nir_component_mask_t write_mask,nir_alu_type src_type)430 emit_store(struct lower_io_state *state, nir_def *data,
431 nir_def *array_index, nir_variable *var, nir_def *offset,
432 unsigned component, unsigned num_components,
433 nir_component_mask_t write_mask, nir_alu_type src_type)
434 {
435 nir_builder *b = &state->builder;
436
437 assert(var->data.mode == nir_var_shader_out);
438 nir_intrinsic_op op =
439 !array_index ? nir_intrinsic_store_output : var->data.per_primitive ? nir_intrinsic_store_per_primitive_output
440 : nir_intrinsic_store_per_vertex_output;
441
442 nir_intrinsic_instr *store =
443 nir_intrinsic_instr_create(state->builder.shader, op);
444 store->num_components = num_components;
445
446 store->src[0] = nir_src_for_ssa(data);
447
448 const struct glsl_type *type = var->type;
449 if (array_index)
450 type = glsl_get_array_element(type);
451 unsigned var_size = state->type_size(type, var->data.bindless);
452 nir_intrinsic_set_base(store, var->data.driver_location);
453 nir_intrinsic_set_range(store, var_size);
454 nir_intrinsic_set_component(store, component);
455 nir_intrinsic_set_src_type(store, src_type);
456
457 nir_intrinsic_set_write_mask(store, write_mask);
458
459 if (nir_intrinsic_has_access(store))
460 nir_intrinsic_set_access(store, var->data.access);
461
462 if (array_index)
463 store->src[1] = nir_src_for_ssa(array_index);
464
465 store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset);
466
467 unsigned gs_streams = 0;
468 if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) {
469 if (var->data.stream & NIR_STREAM_PACKED) {
470 gs_streams = var->data.stream & ~NIR_STREAM_PACKED;
471 } else {
472 assert(var->data.stream < 4);
473 gs_streams = 0;
474 for (unsigned i = 0; i < num_components; ++i)
475 gs_streams |= var->data.stream << (2 * i);
476 }
477 }
478
479 nir_io_semantics semantics = { 0 };
480 semantics.location = var->data.location;
481 semantics.num_slots = get_number_of_slots(state, var);
482 semantics.dual_source_blend_index = var->data.index;
483 semantics.gs_streams = gs_streams;
484 semantics.medium_precision =
485 var->data.precision == GLSL_PRECISION_MEDIUM ||
486 var->data.precision == GLSL_PRECISION_LOW;
487 semantics.per_view = var->data.per_view;
488 semantics.invariant = var->data.invariant;
489
490 nir_intrinsic_set_io_semantics(store, semantics);
491
492 nir_builder_instr_insert(b, &store->instr);
493 }
494
495 static void
lower_store(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,const struct glsl_type * type)496 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
497 nir_def *array_index, nir_variable *var, nir_def *offset,
498 unsigned component, const struct glsl_type *type)
499 {
500 const bool lower_double = !glsl_type_is_integer(type) && state->options & nir_lower_io_lower_64bit_float_to_32;
501 if (intrin->src[1].ssa->bit_size == 64 &&
502 (lower_double || (state->options & (nir_lower_io_lower_64bit_to_32 |
503 nir_lower_io_lower_64bit_to_32_new)))) {
504 nir_builder *b = &state->builder;
505
506 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
507
508 assert(component == 0 || component == 2);
509 unsigned src_comp = 0;
510 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
511 while (src_comp < intrin->num_components) {
512 const unsigned num_comps =
513 MIN2(intrin->num_components - src_comp,
514 (4 - component) / 2);
515
516 if (write_mask & BITFIELD_MASK(num_comps)) {
517 nir_def *data =
518 nir_channels(b, intrin->src[1].ssa,
519 BITFIELD_RANGE(src_comp, num_comps));
520 nir_def *data32 = nir_bitcast_vector(b, data, 32);
521
522 uint32_t write_mask32 = 0;
523 for (unsigned i = 0; i < num_comps; i++) {
524 if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
525 write_mask32 |= 3 << (i * 2);
526 }
527
528 emit_store(state, data32, array_index, var, offset,
529 component, data32->num_components, write_mask32,
530 nir_type_uint32);
531 }
532
533 /* Only the first store has a component offset */
534 component = 0;
535 src_comp += num_comps;
536 write_mask >>= num_comps;
537 offset = nir_iadd_imm(b, offset, slot_size);
538 }
539 } else if (intrin->def.bit_size == 1) {
540 /* Booleans are 32-bit */
541 assert(glsl_type_is_boolean(type));
542 nir_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
543 emit_store(state, b32_val, array_index, var, offset,
544 component, intrin->num_components,
545 nir_intrinsic_write_mask(intrin),
546 nir_type_bool32);
547 } else {
548 emit_store(state, intrin->src[1].ssa, array_index, var, offset,
549 component, intrin->num_components,
550 nir_intrinsic_write_mask(intrin),
551 nir_get_nir_type_for_glsl_type(type));
552 }
553 }
554
555 static nir_def *
lower_interpolate_at(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_variable * var,nir_def * offset,unsigned component,const struct glsl_type * type)556 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
557 nir_variable *var, nir_def *offset, unsigned component,
558 const struct glsl_type *type)
559 {
560 nir_builder *b = &state->builder;
561 assert(var->data.mode == nir_var_shader_in);
562
563 /* Ignore interpolateAt() for flat variables - flat is flat. Lower
564 * interpolateAtVertex() for explicit variables.
565 */
566 if (var->data.interpolation == INTERP_MODE_FLAT ||
567 var->data.interpolation == INTERP_MODE_EXPLICIT) {
568 nir_def *vertex_index = NULL;
569
570 if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
571 assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
572 vertex_index = intrin->src[1].ssa;
573 }
574
575 return lower_load(intrin, state, vertex_index, var, offset, component, type);
576 }
577
578 /* None of the supported APIs allow interpolation on 64-bit things */
579 assert(intrin->def.bit_size <= 32);
580
581 nir_intrinsic_op bary_op;
582 switch (intrin->intrinsic) {
583 case nir_intrinsic_interp_deref_at_centroid:
584 bary_op = nir_intrinsic_load_barycentric_centroid;
585 break;
586 case nir_intrinsic_interp_deref_at_sample:
587 bary_op = nir_intrinsic_load_barycentric_at_sample;
588 break;
589 case nir_intrinsic_interp_deref_at_offset:
590 bary_op = nir_intrinsic_load_barycentric_at_offset;
591 break;
592 default:
593 unreachable("Bogus interpolateAt() intrinsic.");
594 }
595
596 nir_intrinsic_instr *bary_setup =
597 nir_intrinsic_instr_create(state->builder.shader, bary_op);
598
599 nir_def_init(&bary_setup->instr, &bary_setup->def, 2, 32);
600 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
601
602 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
603 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
604 intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
605 bary_setup->src[0] = nir_src_for_ssa(intrin->src[1].ssa);
606
607 nir_builder_instr_insert(b, &bary_setup->instr);
608
609 nir_io_semantics semantics = { 0 };
610 semantics.location = var->data.location;
611 semantics.num_slots = get_number_of_slots(state, var);
612 semantics.medium_precision =
613 var->data.precision == GLSL_PRECISION_MEDIUM ||
614 var->data.precision == GLSL_PRECISION_LOW;
615
616 nir_def *load =
617 nir_load_interpolated_input(&state->builder,
618 intrin->def.num_components,
619 intrin->def.bit_size,
620 &bary_setup->def,
621 offset,
622 .base = var->data.driver_location,
623 .component = component,
624 .io_semantics = semantics,
625 .dest_type = nir_type_float | intrin->def.bit_size);
626
627 return load;
628 }
629
630 static bool
nir_lower_io_block(nir_block * block,struct lower_io_state * state)631 nir_lower_io_block(nir_block *block,
632 struct lower_io_state *state)
633 {
634 nir_builder *b = &state->builder;
635 const nir_shader_compiler_options *options = b->shader->options;
636 bool progress = false;
637
638 nir_foreach_instr_safe(instr, block) {
639 if (instr->type != nir_instr_type_intrinsic)
640 continue;
641
642 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
643
644 switch (intrin->intrinsic) {
645 case nir_intrinsic_load_deref:
646 case nir_intrinsic_store_deref:
647 /* We can lower the io for this nir instrinsic */
648 break;
649 case nir_intrinsic_interp_deref_at_centroid:
650 case nir_intrinsic_interp_deref_at_sample:
651 case nir_intrinsic_interp_deref_at_offset:
652 case nir_intrinsic_interp_deref_at_vertex:
653 /* We can optionally lower these to load_interpolated_input */
654 if (options->use_interpolated_input_intrinsics ||
655 options->lower_interpolate_at)
656 break;
657 FALLTHROUGH;
658 default:
659 /* We can't lower the io for this nir instrinsic, so skip it */
660 continue;
661 }
662
663 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
664 if (!nir_deref_mode_is_one_of(deref, state->modes))
665 continue;
666
667 nir_variable *var = nir_deref_instr_get_variable(deref);
668
669 b->cursor = nir_before_instr(instr);
670
671 const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage);
672
673 nir_def *offset;
674 nir_def *array_index = NULL;
675 unsigned component_offset = var->data.location_frac;
676 bool bindless_type_size = var->data.mode == nir_var_shader_in ||
677 var->data.mode == nir_var_shader_out ||
678 var->data.bindless;
679
680 if (nir_deref_instr_is_known_out_of_bounds(deref)) {
681 /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
682 *
683 * In the subsections described above for array, vector, matrix and
684 * structure accesses, any out-of-bounds access produced undefined
685 * behavior....
686 * Out-of-bounds reads return undefined values, which
687 * include values from other variables of the active program or zero.
688 * Out-of-bounds writes may be discarded or overwrite
689 * other variables of the active program.
690 *
691 * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero
692 * for reads.
693 *
694 * Otherwise get_io_offset would return out-of-bound offset which may
695 * result in out-of-bound loading/storing of inputs/outputs,
696 * that could cause issues in drivers down the line.
697 */
698 if (intrin->intrinsic != nir_intrinsic_store_deref) {
699 nir_def *zero =
700 nir_imm_zero(b, intrin->def.num_components,
701 intrin->def.bit_size);
702 nir_def_rewrite_uses(&intrin->def,
703 zero);
704 }
705
706 nir_instr_remove(&intrin->instr);
707 progress = true;
708 continue;
709 }
710
711 offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL,
712 state->type_size, &component_offset,
713 bindless_type_size);
714
715 nir_def *replacement = NULL;
716
717 switch (intrin->intrinsic) {
718 case nir_intrinsic_load_deref:
719 replacement = lower_load(intrin, state, array_index, var, offset,
720 component_offset, deref->type);
721 break;
722
723 case nir_intrinsic_store_deref:
724 lower_store(intrin, state, array_index, var, offset,
725 component_offset, deref->type);
726 break;
727
728 case nir_intrinsic_interp_deref_at_centroid:
729 case nir_intrinsic_interp_deref_at_sample:
730 case nir_intrinsic_interp_deref_at_offset:
731 case nir_intrinsic_interp_deref_at_vertex:
732 assert(array_index == NULL);
733 replacement = lower_interpolate_at(intrin, state, var, offset,
734 component_offset, deref->type);
735 break;
736
737 default:
738 continue;
739 }
740
741 if (replacement) {
742 nir_def_rewrite_uses(&intrin->def,
743 replacement);
744 }
745 nir_instr_remove(&intrin->instr);
746 progress = true;
747 }
748
749 return progress;
750 }
751
752 static bool
nir_lower_io_impl(nir_function_impl * impl,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)753 nir_lower_io_impl(nir_function_impl *impl,
754 nir_variable_mode modes,
755 int (*type_size)(const struct glsl_type *, bool),
756 nir_lower_io_options options)
757 {
758 struct lower_io_state state;
759 bool progress = false;
760
761 state.builder = nir_builder_create(impl);
762 state.dead_ctx = ralloc_context(NULL);
763 state.modes = modes;
764 state.type_size = type_size;
765 state.options = options;
766
767 ASSERTED nir_variable_mode supported_modes =
768 nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
769 assert(!(modes & ~supported_modes));
770
771 nir_foreach_block(block, impl) {
772 progress |= nir_lower_io_block(block, &state);
773 }
774
775 ralloc_free(state.dead_ctx);
776
777 nir_metadata_preserve(impl, nir_metadata_none);
778
779 return progress;
780 }
781
782 /** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
783 *
784 * This pass is intended to be used for cross-stage shader I/O and driver-
785 * managed uniforms to turn deref-based access into a simpler model using
786 * locations or offsets. For fragment shader inputs, it can optionally turn
787 * load_deref into an explicit interpolation using barycentrics coming from
788 * one of the load_barycentric_* intrinsics. This pass requires that all
789 * deref chains are complete and contain no casts.
790 */
791 bool
nir_lower_io(nir_shader * shader,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)792 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
793 int (*type_size)(const struct glsl_type *, bool),
794 nir_lower_io_options options)
795 {
796 bool progress = false;
797
798 nir_foreach_function_impl(impl, shader) {
799 progress |= nir_lower_io_impl(impl, modes, type_size, options);
800 }
801
802 return progress;
803 }
804
805 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)806 type_scalar_size_bytes(const struct glsl_type *type)
807 {
808 assert(glsl_type_is_vector_or_scalar(type) ||
809 glsl_type_is_matrix(type));
810 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
811 }
812
813 nir_def *
nir_build_addr_iadd(nir_builder * b,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,nir_def * offset)814 nir_build_addr_iadd(nir_builder *b, nir_def *addr,
815 nir_address_format addr_format,
816 nir_variable_mode modes,
817 nir_def *offset)
818 {
819 assert(offset->num_components == 1);
820
821 switch (addr_format) {
822 case nir_address_format_32bit_global:
823 case nir_address_format_64bit_global:
824 case nir_address_format_32bit_offset:
825 assert(addr->bit_size == offset->bit_size);
826 assert(addr->num_components == 1);
827 return nir_iadd(b, addr, offset);
828
829 case nir_address_format_2x32bit_global: {
830 assert(addr->num_components == 2);
831 nir_def *lo = nir_channel(b, addr, 0);
832 nir_def *hi = nir_channel(b, addr, 1);
833 nir_def *res_lo = nir_iadd(b, lo, offset);
834 nir_def *carry = nir_b2i32(b, nir_ult(b, res_lo, lo));
835 nir_def *res_hi = nir_iadd(b, hi, carry);
836 return nir_vec2(b, res_lo, res_hi);
837 }
838
839 case nir_address_format_32bit_offset_as_64bit:
840 assert(addr->num_components == 1);
841 assert(offset->bit_size == 32);
842 return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset));
843
844 case nir_address_format_64bit_global_32bit_offset:
845 case nir_address_format_64bit_bounded_global:
846 assert(addr->num_components == 4);
847 assert(addr->bit_size == offset->bit_size);
848 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3);
849
850 case nir_address_format_32bit_index_offset:
851 assert(addr->num_components == 2);
852 assert(addr->bit_size == offset->bit_size);
853 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1);
854
855 case nir_address_format_32bit_index_offset_pack64:
856 assert(addr->num_components == 1);
857 assert(offset->bit_size == 32);
858 return nir_pack_64_2x32_split(b,
859 nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset),
860 nir_unpack_64_2x32_split_y(b, addr));
861
862 case nir_address_format_vec2_index_32bit_offset:
863 assert(addr->num_components == 3);
864 assert(offset->bit_size == 32);
865 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2);
866
867 case nir_address_format_62bit_generic:
868 assert(addr->num_components == 1);
869 assert(addr->bit_size == 64);
870 assert(offset->bit_size == 64);
871 if (!(modes & ~(nir_var_function_temp |
872 nir_var_shader_temp |
873 nir_var_mem_shared))) {
874 /* If we're sure it's one of these modes, we can do an easy 32-bit
875 * addition and don't need to bother with 64-bit math.
876 */
877 nir_def *addr32 = nir_unpack_64_2x32_split_x(b, addr);
878 nir_def *type = nir_unpack_64_2x32_split_y(b, addr);
879 addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset));
880 return nir_pack_64_2x32_split(b, addr32, type);
881 } else {
882 return nir_iadd(b, addr, offset);
883 }
884
885 case nir_address_format_logical:
886 unreachable("Unsupported address format");
887 }
888 unreachable("Invalid address format");
889 }
890
891 static unsigned
addr_get_offset_bit_size(nir_def * addr,nir_address_format addr_format)892 addr_get_offset_bit_size(nir_def *addr, nir_address_format addr_format)
893 {
894 if (addr_format == nir_address_format_32bit_offset_as_64bit ||
895 addr_format == nir_address_format_32bit_index_offset_pack64)
896 return 32;
897 return addr->bit_size;
898 }
899
900 nir_def *
nir_build_addr_iadd_imm(nir_builder * b,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,int64_t offset)901 nir_build_addr_iadd_imm(nir_builder *b, nir_def *addr,
902 nir_address_format addr_format,
903 nir_variable_mode modes,
904 int64_t offset)
905 {
906 if (!offset)
907 return addr;
908
909 return nir_build_addr_iadd(
910 b, addr, addr_format, modes,
911 nir_imm_intN_t(b, offset,
912 addr_get_offset_bit_size(addr, addr_format)));
913 }
914
915 static nir_def *
build_addr_for_var(nir_builder * b,nir_variable * var,nir_address_format addr_format)916 build_addr_for_var(nir_builder *b, nir_variable *var,
917 nir_address_format addr_format)
918 {
919 assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared |
920 nir_var_mem_task_payload |
921 nir_var_mem_global |
922 nir_var_shader_temp | nir_var_function_temp |
923 nir_var_mem_push_const | nir_var_mem_constant));
924
925 const unsigned num_comps = nir_address_format_num_components(addr_format);
926 const unsigned bit_size = nir_address_format_bit_size(addr_format);
927
928 switch (addr_format) {
929 case nir_address_format_2x32bit_global:
930 case nir_address_format_32bit_global:
931 case nir_address_format_64bit_global: {
932 nir_def *base_addr;
933 switch (var->data.mode) {
934 case nir_var_shader_temp:
935 base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0);
936 break;
937
938 case nir_var_function_temp:
939 base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1);
940 break;
941
942 case nir_var_mem_constant:
943 base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size);
944 break;
945
946 case nir_var_mem_shared:
947 base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size);
948 break;
949
950 case nir_var_mem_global:
951 base_addr = nir_load_global_base_ptr(b, num_comps, bit_size);
952 break;
953
954 default:
955 unreachable("Unsupported variable mode");
956 }
957
958 return nir_build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode,
959 var->data.driver_location);
960 }
961
962 case nir_address_format_32bit_offset:
963 assert(var->data.driver_location <= UINT32_MAX);
964 return nir_imm_int(b, var->data.driver_location);
965
966 case nir_address_format_32bit_offset_as_64bit:
967 assert(var->data.driver_location <= UINT32_MAX);
968 return nir_imm_int64(b, var->data.driver_location);
969
970 case nir_address_format_62bit_generic:
971 switch (var->data.mode) {
972 case nir_var_shader_temp:
973 case nir_var_function_temp:
974 assert(var->data.driver_location <= UINT32_MAX);
975 return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64);
976
977 case nir_var_mem_shared:
978 assert(var->data.driver_location <= UINT32_MAX);
979 return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64);
980
981 case nir_var_mem_global:
982 return nir_iadd_imm(b, nir_load_global_base_ptr(b, num_comps, bit_size),
983 var->data.driver_location);
984
985 default:
986 unreachable("Unsupported variable mode");
987 }
988
989 default:
990 unreachable("Unsupported address format");
991 }
992 }
993
994 static nir_def *
build_runtime_addr_mode_check(nir_builder * b,nir_def * addr,nir_address_format addr_format,nir_variable_mode mode)995 build_runtime_addr_mode_check(nir_builder *b, nir_def *addr,
996 nir_address_format addr_format,
997 nir_variable_mode mode)
998 {
999 /* The compile-time check failed; do a run-time check */
1000 switch (addr_format) {
1001 case nir_address_format_62bit_generic: {
1002 assert(addr->num_components == 1);
1003 assert(addr->bit_size == 64);
1004 nir_def *mode_enum = nir_ushr_imm(b, addr, 62);
1005 switch (mode) {
1006 case nir_var_function_temp:
1007 case nir_var_shader_temp:
1008 return nir_ieq_imm(b, mode_enum, 0x2);
1009
1010 case nir_var_mem_shared:
1011 return nir_ieq_imm(b, mode_enum, 0x1);
1012
1013 case nir_var_mem_global:
1014 return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0),
1015 nir_ieq_imm(b, mode_enum, 0x3));
1016
1017 default:
1018 unreachable("Invalid mode check intrinsic");
1019 }
1020 }
1021
1022 default:
1023 unreachable("Unsupported address mode");
1024 }
1025 }
1026
1027 unsigned
nir_address_format_bit_size(nir_address_format addr_format)1028 nir_address_format_bit_size(nir_address_format addr_format)
1029 {
1030 switch (addr_format) {
1031 case nir_address_format_32bit_global:
1032 return 32;
1033 case nir_address_format_2x32bit_global:
1034 return 32;
1035 case nir_address_format_64bit_global:
1036 return 64;
1037 case nir_address_format_64bit_global_32bit_offset:
1038 return 32;
1039 case nir_address_format_64bit_bounded_global:
1040 return 32;
1041 case nir_address_format_32bit_index_offset:
1042 return 32;
1043 case nir_address_format_32bit_index_offset_pack64:
1044 return 64;
1045 case nir_address_format_vec2_index_32bit_offset:
1046 return 32;
1047 case nir_address_format_62bit_generic:
1048 return 64;
1049 case nir_address_format_32bit_offset:
1050 return 32;
1051 case nir_address_format_32bit_offset_as_64bit:
1052 return 64;
1053 case nir_address_format_logical:
1054 return 32;
1055 }
1056 unreachable("Invalid address format");
1057 }
1058
1059 unsigned
nir_address_format_num_components(nir_address_format addr_format)1060 nir_address_format_num_components(nir_address_format addr_format)
1061 {
1062 switch (addr_format) {
1063 case nir_address_format_32bit_global:
1064 return 1;
1065 case nir_address_format_2x32bit_global:
1066 return 2;
1067 case nir_address_format_64bit_global:
1068 return 1;
1069 case nir_address_format_64bit_global_32bit_offset:
1070 return 4;
1071 case nir_address_format_64bit_bounded_global:
1072 return 4;
1073 case nir_address_format_32bit_index_offset:
1074 return 2;
1075 case nir_address_format_32bit_index_offset_pack64:
1076 return 1;
1077 case nir_address_format_vec2_index_32bit_offset:
1078 return 3;
1079 case nir_address_format_62bit_generic:
1080 return 1;
1081 case nir_address_format_32bit_offset:
1082 return 1;
1083 case nir_address_format_32bit_offset_as_64bit:
1084 return 1;
1085 case nir_address_format_logical:
1086 return 1;
1087 }
1088 unreachable("Invalid address format");
1089 }
1090
1091 static nir_def *
addr_to_index(nir_builder * b,nir_def * addr,nir_address_format addr_format)1092 addr_to_index(nir_builder *b, nir_def *addr,
1093 nir_address_format addr_format)
1094 {
1095 switch (addr_format) {
1096 case nir_address_format_32bit_index_offset:
1097 assert(addr->num_components == 2);
1098 return nir_channel(b, addr, 0);
1099 case nir_address_format_32bit_index_offset_pack64:
1100 return nir_unpack_64_2x32_split_y(b, addr);
1101 case nir_address_format_vec2_index_32bit_offset:
1102 assert(addr->num_components == 3);
1103 return nir_trim_vector(b, addr, 2);
1104 default:
1105 unreachable("Invalid address format");
1106 }
1107 }
1108
1109 static nir_def *
addr_to_offset(nir_builder * b,nir_def * addr,nir_address_format addr_format)1110 addr_to_offset(nir_builder *b, nir_def *addr,
1111 nir_address_format addr_format)
1112 {
1113 switch (addr_format) {
1114 case nir_address_format_32bit_index_offset:
1115 assert(addr->num_components == 2);
1116 return nir_channel(b, addr, 1);
1117 case nir_address_format_32bit_index_offset_pack64:
1118 return nir_unpack_64_2x32_split_x(b, addr);
1119 case nir_address_format_vec2_index_32bit_offset:
1120 assert(addr->num_components == 3);
1121 return nir_channel(b, addr, 2);
1122 case nir_address_format_32bit_offset:
1123 return addr;
1124 case nir_address_format_32bit_offset_as_64bit:
1125 case nir_address_format_62bit_generic:
1126 return nir_u2u32(b, addr);
1127 default:
1128 unreachable("Invalid address format");
1129 }
1130 }
1131
1132 /** Returns true if the given address format resolves to a global address */
1133 static bool
addr_format_is_global(nir_address_format addr_format,nir_variable_mode mode)1134 addr_format_is_global(nir_address_format addr_format,
1135 nir_variable_mode mode)
1136 {
1137 if (addr_format == nir_address_format_62bit_generic)
1138 return mode == nir_var_mem_global;
1139
1140 return addr_format == nir_address_format_32bit_global ||
1141 addr_format == nir_address_format_2x32bit_global ||
1142 addr_format == nir_address_format_64bit_global ||
1143 addr_format == nir_address_format_64bit_global_32bit_offset ||
1144 addr_format == nir_address_format_64bit_bounded_global;
1145 }
1146
1147 static bool
addr_format_is_offset(nir_address_format addr_format,nir_variable_mode mode)1148 addr_format_is_offset(nir_address_format addr_format,
1149 nir_variable_mode mode)
1150 {
1151 if (addr_format == nir_address_format_62bit_generic)
1152 return mode != nir_var_mem_global;
1153
1154 return addr_format == nir_address_format_32bit_offset ||
1155 addr_format == nir_address_format_32bit_offset_as_64bit;
1156 }
1157
1158 static nir_def *
addr_to_global(nir_builder * b,nir_def * addr,nir_address_format addr_format)1159 addr_to_global(nir_builder *b, nir_def *addr,
1160 nir_address_format addr_format)
1161 {
1162 switch (addr_format) {
1163 case nir_address_format_32bit_global:
1164 case nir_address_format_64bit_global:
1165 case nir_address_format_62bit_generic:
1166 assert(addr->num_components == 1);
1167 return addr;
1168
1169 case nir_address_format_2x32bit_global:
1170 assert(addr->num_components == 2);
1171 return addr;
1172
1173 case nir_address_format_64bit_global_32bit_offset:
1174 case nir_address_format_64bit_bounded_global:
1175 assert(addr->num_components == 4);
1176 return nir_iadd(b, nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)),
1177 nir_u2u64(b, nir_channel(b, addr, 3)));
1178
1179 case nir_address_format_32bit_index_offset:
1180 case nir_address_format_32bit_index_offset_pack64:
1181 case nir_address_format_vec2_index_32bit_offset:
1182 case nir_address_format_32bit_offset:
1183 case nir_address_format_32bit_offset_as_64bit:
1184 case nir_address_format_logical:
1185 unreachable("Cannot get a 64-bit address with this address format");
1186 }
1187
1188 unreachable("Invalid address format");
1189 }
1190
1191 static bool
addr_format_needs_bounds_check(nir_address_format addr_format)1192 addr_format_needs_bounds_check(nir_address_format addr_format)
1193 {
1194 return addr_format == nir_address_format_64bit_bounded_global;
1195 }
1196
1197 static nir_def *
addr_is_in_bounds(nir_builder * b,nir_def * addr,nir_address_format addr_format,unsigned size)1198 addr_is_in_bounds(nir_builder *b, nir_def *addr,
1199 nir_address_format addr_format, unsigned size)
1200 {
1201 assert(addr_format == nir_address_format_64bit_bounded_global);
1202 assert(addr->num_components == 4);
1203 assert(size > 0);
1204 return nir_ult(b, nir_iadd_imm(b, nir_channel(b, addr, 3), size - 1),
1205 nir_channel(b, addr, 2));
1206 }
1207
1208 static void
nir_get_explicit_deref_range(nir_deref_instr * deref,nir_address_format addr_format,uint32_t * out_base,uint32_t * out_range)1209 nir_get_explicit_deref_range(nir_deref_instr *deref,
1210 nir_address_format addr_format,
1211 uint32_t *out_base,
1212 uint32_t *out_range)
1213 {
1214 uint32_t base = 0;
1215 uint32_t range = glsl_get_explicit_size(deref->type, false);
1216
1217 while (true) {
1218 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1219
1220 switch (deref->deref_type) {
1221 case nir_deref_type_array:
1222 case nir_deref_type_array_wildcard:
1223 case nir_deref_type_ptr_as_array: {
1224 const unsigned stride = nir_deref_instr_array_stride(deref);
1225 if (stride == 0)
1226 goto fail;
1227
1228 if (!parent)
1229 goto fail;
1230
1231 if (deref->deref_type != nir_deref_type_array_wildcard &&
1232 nir_src_is_const(deref->arr.index)) {
1233 base += stride * nir_src_as_uint(deref->arr.index);
1234 } else {
1235 if (glsl_get_length(parent->type) == 0)
1236 goto fail;
1237 range += stride * (glsl_get_length(parent->type) - 1);
1238 }
1239 break;
1240 }
1241
1242 case nir_deref_type_struct: {
1243 if (!parent)
1244 goto fail;
1245
1246 base += glsl_get_struct_field_offset(parent->type, deref->strct.index);
1247 break;
1248 }
1249
1250 case nir_deref_type_cast: {
1251 nir_instr *parent_instr = deref->parent.ssa->parent_instr;
1252
1253 switch (parent_instr->type) {
1254 case nir_instr_type_load_const: {
1255 nir_load_const_instr *load = nir_instr_as_load_const(parent_instr);
1256
1257 switch (addr_format) {
1258 case nir_address_format_32bit_offset:
1259 base += load->value[1].u32;
1260 break;
1261 case nir_address_format_32bit_index_offset:
1262 base += load->value[1].u32;
1263 break;
1264 case nir_address_format_vec2_index_32bit_offset:
1265 base += load->value[2].u32;
1266 break;
1267 default:
1268 goto fail;
1269 }
1270
1271 *out_base = base;
1272 *out_range = range;
1273 return;
1274 }
1275
1276 case nir_instr_type_intrinsic: {
1277 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr);
1278 switch (intr->intrinsic) {
1279 case nir_intrinsic_load_vulkan_descriptor:
1280 /* Assume that a load_vulkan_descriptor won't contribute to an
1281 * offset within the resource.
1282 */
1283 break;
1284 default:
1285 goto fail;
1286 }
1287
1288 *out_base = base;
1289 *out_range = range;
1290 return;
1291 }
1292
1293 default:
1294 goto fail;
1295 }
1296 }
1297
1298 default:
1299 goto fail;
1300 }
1301
1302 deref = parent;
1303 }
1304
1305 fail:
1306 *out_base = 0;
1307 *out_range = ~0;
1308 }
1309
1310 static nir_variable_mode
canonicalize_generic_modes(nir_variable_mode modes)1311 canonicalize_generic_modes(nir_variable_mode modes)
1312 {
1313 assert(modes != 0);
1314 if (util_bitcount(modes) == 1)
1315 return modes;
1316
1317 assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp |
1318 nir_var_mem_shared | nir_var_mem_global)));
1319
1320 /* Canonicalize by converting shader_temp to function_temp */
1321 if (modes & nir_var_shader_temp) {
1322 modes &= ~nir_var_shader_temp;
1323 modes |= nir_var_function_temp;
1324 }
1325
1326 return modes;
1327 }
1328
1329 static nir_intrinsic_op
get_store_global_op_from_addr_format(nir_address_format addr_format)1330 get_store_global_op_from_addr_format(nir_address_format addr_format)
1331 {
1332 if (addr_format != nir_address_format_2x32bit_global)
1333 return nir_intrinsic_store_global;
1334 else
1335 return nir_intrinsic_store_global_2x32;
1336 }
1337
1338 static nir_intrinsic_op
get_load_global_op_from_addr_format(nir_address_format addr_format)1339 get_load_global_op_from_addr_format(nir_address_format addr_format)
1340 {
1341 if (addr_format != nir_address_format_2x32bit_global)
1342 return nir_intrinsic_load_global;
1343 else
1344 return nir_intrinsic_load_global_2x32;
1345 }
1346
1347 static nir_intrinsic_op
get_load_global_constant_op_from_addr_format(nir_address_format addr_format)1348 get_load_global_constant_op_from_addr_format(nir_address_format addr_format)
1349 {
1350 if (addr_format != nir_address_format_2x32bit_global)
1351 return nir_intrinsic_load_global_constant;
1352 else
1353 return nir_intrinsic_load_global_2x32; /* no dedicated op, fallback */
1354 }
1355
1356 static nir_def *
build_explicit_io_load(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,unsigned num_components)1357 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
1358 nir_def *addr, nir_address_format addr_format,
1359 nir_variable_mode modes,
1360 uint32_t align_mul, uint32_t align_offset,
1361 unsigned num_components)
1362 {
1363 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1364 modes = canonicalize_generic_modes(modes);
1365
1366 if (util_bitcount(modes) > 1) {
1367 if (addr_format_is_global(addr_format, modes)) {
1368 return build_explicit_io_load(b, intrin, addr, addr_format,
1369 nir_var_mem_global,
1370 align_mul, align_offset,
1371 num_components);
1372 } else if (modes & nir_var_function_temp) {
1373 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1374 nir_var_function_temp));
1375 nir_def *res1 =
1376 build_explicit_io_load(b, intrin, addr, addr_format,
1377 nir_var_function_temp,
1378 align_mul, align_offset,
1379 num_components);
1380 nir_push_else(b, NULL);
1381 nir_def *res2 =
1382 build_explicit_io_load(b, intrin, addr, addr_format,
1383 modes & ~nir_var_function_temp,
1384 align_mul, align_offset,
1385 num_components);
1386 nir_pop_if(b, NULL);
1387 return nir_if_phi(b, res1, res2);
1388 } else {
1389 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1390 nir_var_mem_shared));
1391 assert(modes & nir_var_mem_shared);
1392 nir_def *res1 =
1393 build_explicit_io_load(b, intrin, addr, addr_format,
1394 nir_var_mem_shared,
1395 align_mul, align_offset,
1396 num_components);
1397 nir_push_else(b, NULL);
1398 assert(modes & nir_var_mem_global);
1399 nir_def *res2 =
1400 build_explicit_io_load(b, intrin, addr, addr_format,
1401 nir_var_mem_global,
1402 align_mul, align_offset,
1403 num_components);
1404 nir_pop_if(b, NULL);
1405 return nir_if_phi(b, res1, res2);
1406 }
1407 }
1408
1409 assert(util_bitcount(modes) == 1);
1410 const nir_variable_mode mode = modes;
1411
1412 nir_intrinsic_op op;
1413 switch (intrin->intrinsic) {
1414 case nir_intrinsic_load_deref:
1415 switch (mode) {
1416 case nir_var_mem_ubo:
1417 if (addr_format == nir_address_format_64bit_global_32bit_offset)
1418 op = nir_intrinsic_load_global_constant_offset;
1419 else if (addr_format == nir_address_format_64bit_bounded_global)
1420 op = nir_intrinsic_load_global_constant_bounded;
1421 else if (addr_format_is_global(addr_format, mode))
1422 op = nir_intrinsic_load_global_constant;
1423 else
1424 op = nir_intrinsic_load_ubo;
1425 break;
1426 case nir_var_mem_ssbo:
1427 if (addr_format_is_global(addr_format, mode))
1428 op = nir_intrinsic_load_global;
1429 else
1430 op = nir_intrinsic_load_ssbo;
1431 break;
1432 case nir_var_mem_global:
1433 assert(addr_format_is_global(addr_format, mode));
1434 op = get_load_global_op_from_addr_format(addr_format);
1435 break;
1436 case nir_var_uniform:
1437 assert(addr_format_is_offset(addr_format, mode));
1438 assert(b->shader->info.stage == MESA_SHADER_KERNEL);
1439 op = nir_intrinsic_load_kernel_input;
1440 break;
1441 case nir_var_mem_shared:
1442 assert(addr_format_is_offset(addr_format, mode));
1443 op = nir_intrinsic_load_shared;
1444 break;
1445 case nir_var_mem_task_payload:
1446 assert(addr_format_is_offset(addr_format, mode));
1447 op = nir_intrinsic_load_task_payload;
1448 break;
1449 case nir_var_shader_temp:
1450 case nir_var_function_temp:
1451 if (addr_format_is_offset(addr_format, mode)) {
1452 op = nir_intrinsic_load_scratch;
1453 } else {
1454 assert(addr_format_is_global(addr_format, mode));
1455 op = get_load_global_op_from_addr_format(addr_format);
1456 }
1457 break;
1458 case nir_var_mem_push_const:
1459 assert(addr_format == nir_address_format_32bit_offset);
1460 op = nir_intrinsic_load_push_constant;
1461 break;
1462 case nir_var_mem_constant:
1463 if (addr_format_is_offset(addr_format, mode)) {
1464 op = nir_intrinsic_load_constant;
1465 } else {
1466 assert(addr_format_is_global(addr_format, mode));
1467 op = get_load_global_constant_op_from_addr_format(addr_format);
1468 }
1469 break;
1470 default:
1471 unreachable("Unsupported explicit IO variable mode");
1472 }
1473 break;
1474
1475 case nir_intrinsic_load_deref_block_intel:
1476 switch (mode) {
1477 case nir_var_mem_ssbo:
1478 if (addr_format_is_global(addr_format, mode))
1479 op = nir_intrinsic_load_global_block_intel;
1480 else
1481 op = nir_intrinsic_load_ssbo_block_intel;
1482 break;
1483 case nir_var_mem_global:
1484 op = nir_intrinsic_load_global_block_intel;
1485 break;
1486 case nir_var_mem_shared:
1487 op = nir_intrinsic_load_shared_block_intel;
1488 break;
1489 default:
1490 unreachable("Unsupported explicit IO variable mode");
1491 }
1492 break;
1493
1494 default:
1495 unreachable("Invalid intrinsic");
1496 }
1497
1498 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
1499
1500 if (op == nir_intrinsic_load_global_constant_offset) {
1501 assert(addr_format == nir_address_format_64bit_global_32bit_offset);
1502 load->src[0] = nir_src_for_ssa(
1503 nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
1504 load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1505 } else if (op == nir_intrinsic_load_global_constant_bounded) {
1506 assert(addr_format == nir_address_format_64bit_bounded_global);
1507 load->src[0] = nir_src_for_ssa(
1508 nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
1509 load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1510 load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2));
1511 } else if (addr_format_is_global(addr_format, mode)) {
1512 load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1513 } else if (addr_format_is_offset(addr_format, mode)) {
1514 assert(addr->num_components == 1);
1515 load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1516 } else {
1517 load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1518 load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1519 }
1520
1521 if (nir_intrinsic_has_access(load))
1522 nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
1523
1524 if (op == nir_intrinsic_load_constant) {
1525 nir_intrinsic_set_base(load, 0);
1526 nir_intrinsic_set_range(load, b->shader->constant_data_size);
1527 } else if (op == nir_intrinsic_load_kernel_input) {
1528 nir_intrinsic_set_base(load, 0);
1529 nir_intrinsic_set_range(load, b->shader->num_uniforms);
1530 } else if (mode == nir_var_mem_push_const) {
1531 /* Push constants are required to be able to be chased back to the
1532 * variable so we can provide a base/range.
1533 */
1534 nir_variable *var = nir_deref_instr_get_variable(deref);
1535 nir_intrinsic_set_base(load, 0);
1536 nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false));
1537 }
1538
1539 unsigned bit_size = intrin->def.bit_size;
1540 if (bit_size == 1) {
1541 /* TODO: Make the native bool bit_size an option. */
1542 bit_size = 32;
1543 }
1544
1545 if (nir_intrinsic_has_align(load))
1546 nir_intrinsic_set_align(load, align_mul, align_offset);
1547
1548 if (nir_intrinsic_has_range_base(load)) {
1549 unsigned base, range;
1550 nir_get_explicit_deref_range(deref, addr_format, &base, &range);
1551 nir_intrinsic_set_range_base(load, base);
1552 nir_intrinsic_set_range(load, range);
1553 }
1554
1555 load->num_components = num_components;
1556 nir_def_init(&load->instr, &load->def, num_components, bit_size);
1557
1558 assert(bit_size % 8 == 0);
1559
1560 nir_def *result;
1561 if (addr_format_needs_bounds_check(addr_format) &&
1562 op != nir_intrinsic_load_global_constant_bounded) {
1563 /* We don't need to bounds-check global_constant_bounded because bounds
1564 * checking is handled by the intrinsic itself.
1565 *
1566 * The Vulkan spec for robustBufferAccess gives us quite a few options
1567 * as to what we can do with an OOB read. Unfortunately, returning
1568 * undefined values isn't one of them so we return an actual zero.
1569 */
1570 nir_def *zero = nir_imm_zero(b, load->num_components, bit_size);
1571
1572 /* TODO: Better handle block_intel. */
1573 assert(load->num_components == 1);
1574 const unsigned load_size = bit_size / 8;
1575 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
1576
1577 nir_builder_instr_insert(b, &load->instr);
1578
1579 nir_pop_if(b, NULL);
1580
1581 result = nir_if_phi(b, &load->def, zero);
1582 } else {
1583 nir_builder_instr_insert(b, &load->instr);
1584 result = &load->def;
1585 }
1586
1587 if (intrin->def.bit_size == 1) {
1588 /* For shared, we can go ahead and use NIR's and/or the back-end's
1589 * standard encoding for booleans rather than forcing a 0/1 boolean.
1590 * This should save an instruction or two.
1591 */
1592 if (mode == nir_var_mem_shared ||
1593 mode == nir_var_shader_temp ||
1594 mode == nir_var_function_temp)
1595 result = nir_b2b1(b, result);
1596 else
1597 result = nir_i2b(b, result);
1598 }
1599
1600 return result;
1601 }
1602
1603 static void
build_explicit_io_store(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,nir_def * value,nir_component_mask_t write_mask)1604 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
1605 nir_def *addr, nir_address_format addr_format,
1606 nir_variable_mode modes,
1607 uint32_t align_mul, uint32_t align_offset,
1608 nir_def *value, nir_component_mask_t write_mask)
1609 {
1610 modes = canonicalize_generic_modes(modes);
1611
1612 if (util_bitcount(modes) > 1) {
1613 if (addr_format_is_global(addr_format, modes)) {
1614 build_explicit_io_store(b, intrin, addr, addr_format,
1615 nir_var_mem_global,
1616 align_mul, align_offset,
1617 value, write_mask);
1618 } else if (modes & nir_var_function_temp) {
1619 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1620 nir_var_function_temp));
1621 build_explicit_io_store(b, intrin, addr, addr_format,
1622 nir_var_function_temp,
1623 align_mul, align_offset,
1624 value, write_mask);
1625 nir_push_else(b, NULL);
1626 build_explicit_io_store(b, intrin, addr, addr_format,
1627 modes & ~nir_var_function_temp,
1628 align_mul, align_offset,
1629 value, write_mask);
1630 nir_pop_if(b, NULL);
1631 } else {
1632 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1633 nir_var_mem_shared));
1634 assert(modes & nir_var_mem_shared);
1635 build_explicit_io_store(b, intrin, addr, addr_format,
1636 nir_var_mem_shared,
1637 align_mul, align_offset,
1638 value, write_mask);
1639 nir_push_else(b, NULL);
1640 assert(modes & nir_var_mem_global);
1641 build_explicit_io_store(b, intrin, addr, addr_format,
1642 nir_var_mem_global,
1643 align_mul, align_offset,
1644 value, write_mask);
1645 nir_pop_if(b, NULL);
1646 }
1647 return;
1648 }
1649
1650 assert(util_bitcount(modes) == 1);
1651 const nir_variable_mode mode = modes;
1652
1653 nir_intrinsic_op op;
1654 switch (intrin->intrinsic) {
1655 case nir_intrinsic_store_deref:
1656 assert(write_mask != 0);
1657
1658 switch (mode) {
1659 case nir_var_mem_ssbo:
1660 if (addr_format_is_global(addr_format, mode))
1661 op = get_store_global_op_from_addr_format(addr_format);
1662 else
1663 op = nir_intrinsic_store_ssbo;
1664 break;
1665 case nir_var_mem_global:
1666 assert(addr_format_is_global(addr_format, mode));
1667 op = get_store_global_op_from_addr_format(addr_format);
1668 break;
1669 case nir_var_mem_shared:
1670 assert(addr_format_is_offset(addr_format, mode));
1671 op = nir_intrinsic_store_shared;
1672 break;
1673 case nir_var_mem_task_payload:
1674 assert(addr_format_is_offset(addr_format, mode));
1675 op = nir_intrinsic_store_task_payload;
1676 break;
1677 case nir_var_shader_temp:
1678 case nir_var_function_temp:
1679 if (addr_format_is_offset(addr_format, mode)) {
1680 op = nir_intrinsic_store_scratch;
1681 } else {
1682 assert(addr_format_is_global(addr_format, mode));
1683 op = get_store_global_op_from_addr_format(addr_format);
1684 }
1685 break;
1686 default:
1687 unreachable("Unsupported explicit IO variable mode");
1688 }
1689 break;
1690
1691 case nir_intrinsic_store_deref_block_intel:
1692 assert(write_mask == 0);
1693
1694 switch (mode) {
1695 case nir_var_mem_ssbo:
1696 if (addr_format_is_global(addr_format, mode))
1697 op = nir_intrinsic_store_global_block_intel;
1698 else
1699 op = nir_intrinsic_store_ssbo_block_intel;
1700 break;
1701 case nir_var_mem_global:
1702 op = nir_intrinsic_store_global_block_intel;
1703 break;
1704 case nir_var_mem_shared:
1705 op = nir_intrinsic_store_shared_block_intel;
1706 break;
1707 default:
1708 unreachable("Unsupported explicit IO variable mode");
1709 }
1710 break;
1711
1712 default:
1713 unreachable("Invalid intrinsic");
1714 }
1715
1716 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
1717
1718 if (value->bit_size == 1) {
1719 /* For shared, we can go ahead and use NIR's and/or the back-end's
1720 * standard encoding for booleans rather than forcing a 0/1 boolean.
1721 * This should save an instruction or two.
1722 *
1723 * TODO: Make the native bool bit_size an option.
1724 */
1725 if (mode == nir_var_mem_shared ||
1726 mode == nir_var_shader_temp ||
1727 mode == nir_var_function_temp)
1728 value = nir_b2b32(b, value);
1729 else
1730 value = nir_b2iN(b, value, 32);
1731 }
1732
1733 store->src[0] = nir_src_for_ssa(value);
1734 if (addr_format_is_global(addr_format, mode)) {
1735 store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1736 } else if (addr_format_is_offset(addr_format, mode)) {
1737 assert(addr->num_components == 1);
1738 store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1739 } else {
1740 store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1741 store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1742 }
1743
1744 nir_intrinsic_set_write_mask(store, write_mask);
1745
1746 if (nir_intrinsic_has_access(store))
1747 nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
1748
1749 nir_intrinsic_set_align(store, align_mul, align_offset);
1750
1751 assert(value->num_components == 1 ||
1752 value->num_components == intrin->num_components);
1753 store->num_components = value->num_components;
1754
1755 assert(value->bit_size % 8 == 0);
1756
1757 if (addr_format_needs_bounds_check(addr_format)) {
1758 /* TODO: Better handle block_intel. */
1759 assert(store->num_components == 1);
1760 const unsigned store_size = value->bit_size / 8;
1761 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
1762
1763 nir_builder_instr_insert(b, &store->instr);
1764
1765 nir_pop_if(b, NULL);
1766 } else {
1767 nir_builder_instr_insert(b, &store->instr);
1768 }
1769 }
1770
1771 static nir_def *
build_explicit_io_atomic(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes)1772 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
1773 nir_def *addr, nir_address_format addr_format,
1774 nir_variable_mode modes)
1775 {
1776 modes = canonicalize_generic_modes(modes);
1777
1778 if (util_bitcount(modes) > 1) {
1779 if (addr_format_is_global(addr_format, modes)) {
1780 return build_explicit_io_atomic(b, intrin, addr, addr_format,
1781 nir_var_mem_global);
1782 } else if (modes & nir_var_function_temp) {
1783 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1784 nir_var_function_temp));
1785 nir_def *res1 =
1786 build_explicit_io_atomic(b, intrin, addr, addr_format,
1787 nir_var_function_temp);
1788 nir_push_else(b, NULL);
1789 nir_def *res2 =
1790 build_explicit_io_atomic(b, intrin, addr, addr_format,
1791 modes & ~nir_var_function_temp);
1792 nir_pop_if(b, NULL);
1793 return nir_if_phi(b, res1, res2);
1794 } else {
1795 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1796 nir_var_mem_shared));
1797 assert(modes & nir_var_mem_shared);
1798 nir_def *res1 =
1799 build_explicit_io_atomic(b, intrin, addr, addr_format,
1800 nir_var_mem_shared);
1801 nir_push_else(b, NULL);
1802 assert(modes & nir_var_mem_global);
1803 nir_def *res2 =
1804 build_explicit_io_atomic(b, intrin, addr, addr_format,
1805 nir_var_mem_global);
1806 nir_pop_if(b, NULL);
1807 return nir_if_phi(b, res1, res2);
1808 }
1809 }
1810
1811 assert(util_bitcount(modes) == 1);
1812 const nir_variable_mode mode = modes;
1813
1814 const unsigned num_data_srcs =
1815 nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
1816
1817 nir_intrinsic_op op;
1818 switch (mode) {
1819 case nir_var_mem_ssbo:
1820 if (addr_format_is_global(addr_format, mode))
1821 op = global_atomic_for_deref(addr_format, intrin->intrinsic);
1822 else
1823 op = ssbo_atomic_for_deref(intrin->intrinsic);
1824 break;
1825 case nir_var_mem_global:
1826 assert(addr_format_is_global(addr_format, mode));
1827 op = global_atomic_for_deref(addr_format, intrin->intrinsic);
1828 break;
1829 case nir_var_mem_shared:
1830 assert(addr_format_is_offset(addr_format, mode));
1831 op = shared_atomic_for_deref(intrin->intrinsic);
1832 break;
1833 case nir_var_mem_task_payload:
1834 assert(addr_format_is_offset(addr_format, mode));
1835 op = task_payload_atomic_for_deref(intrin->intrinsic);
1836 break;
1837 default:
1838 unreachable("Unsupported explicit IO variable mode");
1839 }
1840
1841 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
1842 nir_intrinsic_set_atomic_op(atomic, nir_intrinsic_atomic_op(intrin));
1843
1844 unsigned src = 0;
1845 if (addr_format_is_global(addr_format, mode)) {
1846 atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1847 } else if (addr_format_is_offset(addr_format, mode)) {
1848 assert(addr->num_components == 1);
1849 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1850 } else {
1851 atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1852 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1853 }
1854 for (unsigned i = 0; i < num_data_srcs; i++) {
1855 atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
1856 }
1857
1858 /* Global atomics don't have access flags because they assume that the
1859 * address may be non-uniform.
1860 */
1861 if (nir_intrinsic_has_access(atomic))
1862 nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
1863
1864 assert(intrin->def.num_components == 1);
1865 nir_def_init(&atomic->instr, &atomic->def, 1,
1866 intrin->def.bit_size);
1867
1868 assert(atomic->def.bit_size % 8 == 0);
1869
1870 if (addr_format_needs_bounds_check(addr_format)) {
1871 const unsigned atomic_size = atomic->def.bit_size / 8;
1872 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
1873
1874 nir_builder_instr_insert(b, &atomic->instr);
1875
1876 nir_pop_if(b, NULL);
1877 return nir_if_phi(b, &atomic->def,
1878 nir_undef(b, 1, atomic->def.bit_size));
1879 } else {
1880 nir_builder_instr_insert(b, &atomic->instr);
1881 return &atomic->def;
1882 }
1883 }
1884
1885 nir_def *
nir_explicit_io_address_from_deref(nir_builder * b,nir_deref_instr * deref,nir_def * base_addr,nir_address_format addr_format)1886 nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
1887 nir_def *base_addr,
1888 nir_address_format addr_format)
1889 {
1890 switch (deref->deref_type) {
1891 case nir_deref_type_var:
1892 return build_addr_for_var(b, deref->var, addr_format);
1893
1894 case nir_deref_type_ptr_as_array:
1895 case nir_deref_type_array: {
1896 unsigned stride = nir_deref_instr_array_stride(deref);
1897 assert(stride > 0);
1898
1899 unsigned offset_bit_size = addr_get_offset_bit_size(base_addr, addr_format);
1900 nir_def *index = deref->arr.index.ssa;
1901 nir_def *offset;
1902
1903 /* If the access chain has been declared in-bounds, then we know it doesn't
1904 * overflow the type. For nir_deref_type_array, this implies it cannot be
1905 * negative. Also, since types in NIR have a maximum 32-bit size, we know the
1906 * final result will fit in a 32-bit value so we can convert the index to
1907 * 32-bit before multiplying and save ourselves from a 64-bit multiply.
1908 */
1909 if (deref->arr.in_bounds && deref->deref_type == nir_deref_type_array) {
1910 index = nir_u2u32(b, index);
1911 offset = nir_u2uN(b, nir_amul_imm(b, index, stride), offset_bit_size);
1912 } else {
1913 index = nir_i2iN(b, index, offset_bit_size);
1914 offset = nir_amul_imm(b, index, stride);
1915 }
1916
1917 return nir_build_addr_iadd(b, base_addr, addr_format,
1918 deref->modes, offset);
1919 }
1920
1921 case nir_deref_type_array_wildcard:
1922 unreachable("Wildcards should be lowered by now");
1923 break;
1924
1925 case nir_deref_type_struct: {
1926 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1927 int offset = glsl_get_struct_field_offset(parent->type,
1928 deref->strct.index);
1929 assert(offset >= 0);
1930 return nir_build_addr_iadd_imm(b, base_addr, addr_format,
1931 deref->modes, offset);
1932 }
1933
1934 case nir_deref_type_cast:
1935 /* Nothing to do here */
1936 return base_addr;
1937 }
1938
1939 unreachable("Invalid NIR deref type");
1940 }
1941
1942 void
nir_lower_explicit_io_instr(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format)1943 nir_lower_explicit_io_instr(nir_builder *b,
1944 nir_intrinsic_instr *intrin,
1945 nir_def *addr,
1946 nir_address_format addr_format)
1947 {
1948 b->cursor = nir_after_instr(&intrin->instr);
1949
1950 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1951 unsigned vec_stride = glsl_get_explicit_stride(deref->type);
1952 unsigned scalar_size = type_scalar_size_bytes(deref->type);
1953 if (vec_stride == 0) {
1954 vec_stride = scalar_size;
1955 } else {
1956 assert(glsl_type_is_vector(deref->type));
1957 assert(vec_stride >= scalar_size);
1958 }
1959
1960 uint32_t align_mul, align_offset;
1961 if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) {
1962 /* If we don't have an alignment from the deref, assume scalar */
1963 align_mul = scalar_size;
1964 align_offset = 0;
1965 }
1966
1967 /* In order for bounds checking to be correct as per the Vulkan spec,
1968 * we need to check at the individual component granularity. Prior to
1969 * robustness2, we're technically allowed to be sloppy by 16B. Even with
1970 * robustness2, UBO loads are allowed to have a granularity as high as 256B
1971 * depending on hardware limits. However, we have none of that information
1972 * here. Short of adding new address formats, the easiest way to do that
1973 * is to just split any loads and stores into individual components here.
1974 *
1975 * TODO: At some point in the future we may want to add more ops similar to
1976 * nir_intrinsic_load_global_constant_bounded and make bouds checking the
1977 * back-end's problem. Another option would be to somehow plumb more of
1978 * that information through to nir_lower_explicit_io. For now, however,
1979 * scalarizing is at least correct.
1980 */
1981 bool scalarize = vec_stride > scalar_size ||
1982 addr_format_needs_bounds_check(addr_format);
1983
1984 switch (intrin->intrinsic) {
1985 case nir_intrinsic_load_deref: {
1986 nir_def *value;
1987 if (scalarize) {
1988 nir_def *comps[NIR_MAX_VEC_COMPONENTS] = {
1989 NULL,
1990 };
1991 for (unsigned i = 0; i < intrin->num_components; i++) {
1992 unsigned comp_offset = i * vec_stride;
1993 nir_def *comp_addr = nir_build_addr_iadd_imm(b, addr, addr_format,
1994 deref->modes,
1995 comp_offset);
1996 comps[i] = build_explicit_io_load(b, intrin, comp_addr,
1997 addr_format, deref->modes,
1998 align_mul,
1999 (align_offset + comp_offset) %
2000 align_mul,
2001 1);
2002 }
2003 value = nir_vec(b, comps, intrin->num_components);
2004 } else {
2005 value = build_explicit_io_load(b, intrin, addr, addr_format,
2006 deref->modes, align_mul, align_offset,
2007 intrin->num_components);
2008 }
2009 nir_def_rewrite_uses(&intrin->def, value);
2010 break;
2011 }
2012
2013 case nir_intrinsic_store_deref: {
2014 nir_def *value = intrin->src[1].ssa;
2015 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
2016 if (scalarize) {
2017 for (unsigned i = 0; i < intrin->num_components; i++) {
2018 if (!(write_mask & (1 << i)))
2019 continue;
2020
2021 unsigned comp_offset = i * vec_stride;
2022 nir_def *comp_addr = nir_build_addr_iadd_imm(b, addr, addr_format,
2023 deref->modes,
2024 comp_offset);
2025 build_explicit_io_store(b, intrin, comp_addr, addr_format,
2026 deref->modes, align_mul,
2027 (align_offset + comp_offset) % align_mul,
2028 nir_channel(b, value, i), 1);
2029 }
2030 } else {
2031 build_explicit_io_store(b, intrin, addr, addr_format,
2032 deref->modes, align_mul, align_offset,
2033 value, write_mask);
2034 }
2035 break;
2036 }
2037
2038 case nir_intrinsic_load_deref_block_intel: {
2039 nir_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
2040 deref->modes,
2041 align_mul, align_offset,
2042 intrin->num_components);
2043 nir_def_rewrite_uses(&intrin->def, value);
2044 break;
2045 }
2046
2047 case nir_intrinsic_store_deref_block_intel: {
2048 nir_def *value = intrin->src[1].ssa;
2049 const nir_component_mask_t write_mask = 0;
2050 build_explicit_io_store(b, intrin, addr, addr_format,
2051 deref->modes, align_mul, align_offset,
2052 value, write_mask);
2053 break;
2054 }
2055
2056 default: {
2057 nir_def *value =
2058 build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
2059 nir_def_rewrite_uses(&intrin->def, value);
2060 break;
2061 }
2062 }
2063
2064 nir_instr_remove(&intrin->instr);
2065 }
2066
2067 bool
nir_get_explicit_deref_align(nir_deref_instr * deref,bool default_to_type_align,uint32_t * align_mul,uint32_t * align_offset)2068 nir_get_explicit_deref_align(nir_deref_instr *deref,
2069 bool default_to_type_align,
2070 uint32_t *align_mul,
2071 uint32_t *align_offset)
2072 {
2073 if (deref->deref_type == nir_deref_type_var) {
2074 /* If we see a variable, align_mul is effectively infinite because we
2075 * know the offset exactly (up to the offset of the base pointer for the
2076 * given variable mode). We have to pick something so we choose 256B
2077 * as an arbitrary alignment which seems high enough for any reasonable
2078 * wide-load use-case. Back-ends should clamp alignments down if 256B
2079 * is too large for some reason.
2080 */
2081 *align_mul = 256;
2082 *align_offset = deref->var->data.driver_location % 256;
2083 return true;
2084 }
2085
2086 /* If we're a cast deref that has an alignment, use that. */
2087 if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) {
2088 *align_mul = deref->cast.align_mul;
2089 *align_offset = deref->cast.align_offset;
2090 return true;
2091 }
2092
2093 /* Otherwise, we need to compute the alignment based on the parent */
2094 nir_deref_instr *parent = nir_deref_instr_parent(deref);
2095 if (parent == NULL) {
2096 assert(deref->deref_type == nir_deref_type_cast);
2097 if (default_to_type_align) {
2098 /* If we don't have a parent, assume the type's alignment, if any. */
2099 unsigned type_align = glsl_get_explicit_alignment(deref->type);
2100 if (type_align == 0)
2101 return false;
2102
2103 *align_mul = type_align;
2104 *align_offset = 0;
2105 return true;
2106 } else {
2107 return false;
2108 }
2109 }
2110
2111 uint32_t parent_mul, parent_offset;
2112 if (!nir_get_explicit_deref_align(parent, default_to_type_align,
2113 &parent_mul, &parent_offset))
2114 return false;
2115
2116 switch (deref->deref_type) {
2117 case nir_deref_type_var:
2118 unreachable("Handled above");
2119
2120 case nir_deref_type_array:
2121 case nir_deref_type_array_wildcard:
2122 case nir_deref_type_ptr_as_array: {
2123 const unsigned stride = nir_deref_instr_array_stride(deref);
2124 if (stride == 0)
2125 return false;
2126
2127 if (deref->deref_type != nir_deref_type_array_wildcard &&
2128 nir_src_is_const(deref->arr.index)) {
2129 unsigned offset = nir_src_as_uint(deref->arr.index) * stride;
2130 *align_mul = parent_mul;
2131 *align_offset = (parent_offset + offset) % parent_mul;
2132 } else {
2133 /* If this is a wildcard or an indirect deref, we have to go with the
2134 * power-of-two gcd.
2135 */
2136 *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1));
2137 *align_offset = parent_offset % *align_mul;
2138 }
2139 return true;
2140 }
2141
2142 case nir_deref_type_struct: {
2143 const int offset = glsl_get_struct_field_offset(parent->type,
2144 deref->strct.index);
2145 if (offset < 0)
2146 return false;
2147
2148 *align_mul = parent_mul;
2149 *align_offset = (parent_offset + offset) % parent_mul;
2150 return true;
2151 }
2152
2153 case nir_deref_type_cast:
2154 /* We handled the explicit alignment case above. */
2155 assert(deref->cast.align_mul == 0);
2156 *align_mul = parent_mul;
2157 *align_offset = parent_offset;
2158 return true;
2159 }
2160
2161 unreachable("Invalid deref_instr_type");
2162 }
2163
2164 static void
lower_explicit_io_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format)2165 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
2166 nir_address_format addr_format)
2167 {
2168 /* Ignore samplers/textures, because they are handled by other passes like `nir_lower_samplers`.
2169 * Also do it only for those being uniforms, otherwise it will break GL bindless textures handles
2170 * stored in UBOs.
2171 */
2172 if (nir_deref_mode_is_in_set(deref, nir_var_uniform) &&
2173 (glsl_type_is_sampler(deref->type) ||
2174 glsl_type_is_texture(deref->type)))
2175 return;
2176
2177 /* Just delete the deref if it's not used. We can't use
2178 * nir_deref_instr_remove_if_unused here because it may remove more than
2179 * one deref which could break our list walking since we walk the list
2180 * backwards.
2181 */
2182 if (nir_def_is_unused(&deref->def)) {
2183 nir_instr_remove(&deref->instr);
2184 return;
2185 }
2186
2187 b->cursor = nir_after_instr(&deref->instr);
2188
2189 nir_def *base_addr = NULL;
2190 if (deref->deref_type != nir_deref_type_var) {
2191 base_addr = deref->parent.ssa;
2192 }
2193
2194 nir_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
2195 addr_format);
2196 assert(addr->bit_size == deref->def.bit_size);
2197 assert(addr->num_components == deref->def.num_components);
2198
2199 nir_instr_remove(&deref->instr);
2200 nir_def_rewrite_uses(&deref->def, addr);
2201 }
2202
2203 static void
lower_explicit_io_access(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2204 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
2205 nir_address_format addr_format)
2206 {
2207 nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
2208 }
2209
2210 static void
lower_explicit_io_array_length(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2211 lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
2212 nir_address_format addr_format)
2213 {
2214 b->cursor = nir_after_instr(&intrin->instr);
2215
2216 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2217
2218 assert(glsl_type_is_array(deref->type));
2219 assert(glsl_get_length(deref->type) == 0);
2220 assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
2221 unsigned stride = glsl_get_explicit_stride(deref->type);
2222 assert(stride > 0);
2223
2224 nir_def *addr = &deref->def;
2225
2226 nir_def *offset, *size;
2227 switch (addr_format) {
2228 case nir_address_format_64bit_global_32bit_offset:
2229 case nir_address_format_64bit_bounded_global:
2230 offset = nir_channel(b, addr, 3);
2231 size = nir_channel(b, addr, 2);
2232 break;
2233
2234 case nir_address_format_32bit_index_offset:
2235 case nir_address_format_32bit_index_offset_pack64:
2236 case nir_address_format_vec2_index_32bit_offset: {
2237 offset = addr_to_offset(b, addr, addr_format);
2238 nir_def *index = addr_to_index(b, addr, addr_format);
2239 unsigned access = nir_intrinsic_access(intrin);
2240 size = nir_get_ssbo_size(b, index, .access = access);
2241 break;
2242 }
2243
2244 default:
2245 unreachable("Cannot determine SSBO size");
2246 }
2247
2248 nir_def *remaining = nir_usub_sat(b, size, offset);
2249 nir_def *arr_size = nir_udiv_imm(b, remaining, stride);
2250
2251 nir_def_rewrite_uses(&intrin->def, arr_size);
2252 nir_instr_remove(&intrin->instr);
2253 }
2254
2255 static void
lower_explicit_io_mode_check(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2256 lower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin,
2257 nir_address_format addr_format)
2258 {
2259 if (addr_format_is_global(addr_format, 0)) {
2260 /* If the address format is always global, then the driver can use
2261 * global addresses regardless of the mode. In that case, don't create
2262 * a check, just whack the intrinsic to addr_mode_is and delegate to the
2263 * driver lowering.
2264 */
2265 intrin->intrinsic = nir_intrinsic_addr_mode_is;
2266 return;
2267 }
2268
2269 nir_def *addr = intrin->src[0].ssa;
2270
2271 b->cursor = nir_instr_remove(&intrin->instr);
2272
2273 nir_def *is_mode =
2274 build_runtime_addr_mode_check(b, addr, addr_format,
2275 nir_intrinsic_memory_modes(intrin));
2276
2277 nir_def_rewrite_uses(&intrin->def, is_mode);
2278 }
2279
2280 static bool
nir_lower_explicit_io_impl(nir_function_impl * impl,nir_variable_mode modes,nir_address_format addr_format)2281 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
2282 nir_address_format addr_format)
2283 {
2284 bool progress = false;
2285
2286 nir_builder b = nir_builder_create(impl);
2287
2288 /* Walk in reverse order so that we can see the full deref chain when we
2289 * lower the access operations. We lower them assuming that the derefs
2290 * will be turned into address calculations later.
2291 */
2292 nir_foreach_block_reverse(block, impl) {
2293 nir_foreach_instr_reverse_safe(instr, block) {
2294 switch (instr->type) {
2295 case nir_instr_type_deref: {
2296 nir_deref_instr *deref = nir_instr_as_deref(instr);
2297 if (nir_deref_mode_is_in_set(deref, modes)) {
2298 lower_explicit_io_deref(&b, deref, addr_format);
2299 progress = true;
2300 }
2301 break;
2302 }
2303
2304 case nir_instr_type_intrinsic: {
2305 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2306 switch (intrin->intrinsic) {
2307 case nir_intrinsic_load_deref:
2308 case nir_intrinsic_store_deref:
2309 case nir_intrinsic_load_deref_block_intel:
2310 case nir_intrinsic_store_deref_block_intel:
2311 case nir_intrinsic_deref_atomic:
2312 case nir_intrinsic_deref_atomic_swap: {
2313 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2314 if (nir_deref_mode_is_in_set(deref, modes)) {
2315 lower_explicit_io_access(&b, intrin, addr_format);
2316 progress = true;
2317 }
2318 break;
2319 }
2320
2321 case nir_intrinsic_deref_buffer_array_length: {
2322 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2323 if (nir_deref_mode_is_in_set(deref, modes)) {
2324 lower_explicit_io_array_length(&b, intrin, addr_format);
2325 progress = true;
2326 }
2327 break;
2328 }
2329
2330 case nir_intrinsic_deref_mode_is: {
2331 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2332 if (nir_deref_mode_is_in_set(deref, modes)) {
2333 lower_explicit_io_mode_check(&b, intrin, addr_format);
2334 progress = true;
2335 }
2336 break;
2337 }
2338
2339 case nir_intrinsic_launch_mesh_workgroups_with_payload_deref: {
2340 if (modes & nir_var_mem_task_payload) {
2341 /* Get address and size of the payload variable. */
2342 nir_deref_instr *deref = nir_src_as_deref(intrin->src[1]);
2343 assert(deref->deref_type == nir_deref_type_var);
2344 unsigned base = deref->var->data.explicit_location;
2345 unsigned size = glsl_get_explicit_size(deref->var->type, false);
2346
2347 /* Replace the current instruction with the explicit intrinsic. */
2348 nir_def *dispatch_3d = intrin->src[0].ssa;
2349 b.cursor = nir_instr_remove(instr);
2350 nir_launch_mesh_workgroups(&b, dispatch_3d, .base = base, .range = size);
2351 progress = true;
2352 }
2353
2354 break;
2355 }
2356
2357 default:
2358 break;
2359 }
2360 break;
2361 }
2362
2363 default:
2364 /* Nothing to do */
2365 break;
2366 }
2367 }
2368 }
2369
2370 if (progress) {
2371 nir_metadata_preserve(impl, nir_metadata_none);
2372 } else {
2373 nir_metadata_preserve(impl, nir_metadata_all);
2374 }
2375
2376 return progress;
2377 }
2378
2379 /** Lower explicitly laid out I/O access to byte offset/address intrinsics
2380 *
2381 * This pass is intended to be used for any I/O which touches memory external
2382 * to the shader or which is directly visible to the client. It requires that
2383 * all data types in the given modes have a explicit stride/offset decorations
2384 * to tell it exactly how to calculate the offset/address for the given load,
2385 * store, or atomic operation. If the offset/stride information does not come
2386 * from the client explicitly (as with shared variables in GL or Vulkan),
2387 * nir_lower_vars_to_explicit_types() can be used to add them.
2388 *
2389 * Unlike nir_lower_io, this pass is fully capable of handling incomplete
2390 * pointer chains which may contain cast derefs. It does so by walking the
2391 * deref chain backwards and simply replacing each deref, one at a time, with
2392 * the appropriate address calculation. The pass takes a nir_address_format
2393 * parameter which describes how the offset or address is to be represented
2394 * during calculations. By ensuring that the address is always in a
2395 * consistent format, pointers can safely be conjured from thin air by the
2396 * driver, stored to variables, passed through phis, etc.
2397 *
2398 * The one exception to the simple algorithm described above is for handling
2399 * row-major matrices in which case we may look down one additional level of
2400 * the deref chain.
2401 *
2402 * This pass is also capable of handling OpenCL generic pointers. If the
2403 * address mode is global, it will lower any ambiguous (more than one mode)
2404 * access to global and pass through the deref_mode_is run-time checks as
2405 * addr_mode_is. This assumes the driver has somehow mapped shared and
2406 * scratch memory to the global address space. For other modes such as
2407 * 62bit_generic, there is an enum embedded in the address and we lower
2408 * ambiguous access to an if-ladder and deref_mode_is to a check against the
2409 * embedded enum. If nir_lower_explicit_io is called on any shader that
2410 * contains generic pointers, it must either be used on all of the generic
2411 * modes or none.
2412 */
2413 bool
nir_lower_explicit_io(nir_shader * shader,nir_variable_mode modes,nir_address_format addr_format)2414 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
2415 nir_address_format addr_format)
2416 {
2417 bool progress = false;
2418
2419 nir_foreach_function_impl(impl, shader) {
2420 if (impl && nir_lower_explicit_io_impl(impl, modes, addr_format))
2421 progress = true;
2422 }
2423
2424 return progress;
2425 }
2426
2427 static bool
nir_lower_vars_to_explicit_types_impl(nir_function_impl * impl,nir_variable_mode modes,glsl_type_size_align_func type_info)2428 nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
2429 nir_variable_mode modes,
2430 glsl_type_size_align_func type_info)
2431 {
2432 bool progress = false;
2433
2434 nir_foreach_block(block, impl) {
2435 nir_foreach_instr(instr, block) {
2436 if (instr->type != nir_instr_type_deref)
2437 continue;
2438
2439 nir_deref_instr *deref = nir_instr_as_deref(instr);
2440 if (!nir_deref_mode_is_in_set(deref, modes))
2441 continue;
2442
2443 unsigned size, alignment;
2444 const struct glsl_type *new_type =
2445 glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
2446 if (new_type != deref->type) {
2447 progress = true;
2448 deref->type = new_type;
2449 }
2450 if (deref->deref_type == nir_deref_type_cast) {
2451 /* See also glsl_type::get_explicit_type_for_size_align() */
2452 unsigned new_stride = align(size, alignment);
2453 if (new_stride != deref->cast.ptr_stride) {
2454 deref->cast.ptr_stride = new_stride;
2455 progress = true;
2456 }
2457 }
2458 }
2459 }
2460
2461 if (progress) {
2462 nir_metadata_preserve(impl, nir_metadata_block_index |
2463 nir_metadata_dominance |
2464 nir_metadata_live_defs |
2465 nir_metadata_loop_analysis);
2466 } else {
2467 nir_metadata_preserve(impl, nir_metadata_all);
2468 }
2469
2470 return progress;
2471 }
2472
2473 static bool
lower_vars_to_explicit(nir_shader * shader,struct exec_list * vars,nir_variable_mode mode,glsl_type_size_align_func type_info)2474 lower_vars_to_explicit(nir_shader *shader,
2475 struct exec_list *vars, nir_variable_mode mode,
2476 glsl_type_size_align_func type_info)
2477 {
2478 bool progress = false;
2479 unsigned offset;
2480 switch (mode) {
2481 case nir_var_uniform:
2482 assert(shader->info.stage == MESA_SHADER_KERNEL);
2483 offset = 0;
2484 break;
2485 case nir_var_function_temp:
2486 case nir_var_shader_temp:
2487 offset = shader->scratch_size;
2488 break;
2489 case nir_var_mem_shared:
2490 offset = shader->info.shared_size;
2491 break;
2492 case nir_var_mem_task_payload:
2493 offset = shader->info.task_payload_size;
2494 break;
2495 case nir_var_mem_node_payload:
2496 assert(!shader->info.cs.node_payloads_size);
2497 offset = 0;
2498 break;
2499 case nir_var_mem_global:
2500 offset = shader->global_mem_size;
2501 break;
2502 case nir_var_mem_constant:
2503 offset = shader->constant_data_size;
2504 break;
2505 case nir_var_shader_call_data:
2506 case nir_var_ray_hit_attrib:
2507 case nir_var_mem_node_payload_in:
2508 offset = 0;
2509 break;
2510 default:
2511 unreachable("Unsupported mode");
2512 }
2513 nir_foreach_variable_in_list(var, vars) {
2514 if (var->data.mode != mode)
2515 continue;
2516
2517 unsigned size, alignment;
2518 const struct glsl_type *explicit_type =
2519 glsl_get_explicit_type_for_size_align(var->type, type_info,
2520 &size, &alignment);
2521
2522 if (explicit_type != var->type)
2523 var->type = explicit_type;
2524
2525 UNUSED bool is_empty_struct =
2526 glsl_type_is_struct_or_ifc(explicit_type) &&
2527 glsl_get_length(explicit_type) == 0;
2528
2529 assert(util_is_power_of_two_nonzero(alignment) || is_empty_struct ||
2530 glsl_type_is_cmat(glsl_without_array(explicit_type)));
2531 assert(util_is_power_of_two_or_zero(var->data.alignment));
2532 alignment = MAX2(alignment, var->data.alignment);
2533
2534 var->data.driver_location = ALIGN_POT(offset, alignment);
2535 offset = var->data.driver_location + size;
2536 progress = true;
2537 }
2538
2539 switch (mode) {
2540 case nir_var_uniform:
2541 assert(shader->info.stage == MESA_SHADER_KERNEL);
2542 shader->num_uniforms = offset;
2543 break;
2544 case nir_var_shader_temp:
2545 case nir_var_function_temp:
2546 shader->scratch_size = offset;
2547 break;
2548 case nir_var_mem_shared:
2549 shader->info.shared_size = offset;
2550 break;
2551 case nir_var_mem_task_payload:
2552 shader->info.task_payload_size = offset;
2553 break;
2554 case nir_var_mem_node_payload:
2555 shader->info.cs.node_payloads_size = offset;
2556 break;
2557 case nir_var_mem_global:
2558 shader->global_mem_size = offset;
2559 break;
2560 case nir_var_mem_constant:
2561 shader->constant_data_size = offset;
2562 break;
2563 case nir_var_shader_call_data:
2564 case nir_var_ray_hit_attrib:
2565 case nir_var_mem_node_payload_in:
2566 break;
2567 default:
2568 unreachable("Unsupported mode");
2569 }
2570
2571 return progress;
2572 }
2573
2574 /* If nir_lower_vars_to_explicit_types is called on any shader that contains
2575 * generic pointers, it must either be used on all of the generic modes or
2576 * none.
2577 */
2578 bool
nir_lower_vars_to_explicit_types(nir_shader * shader,nir_variable_mode modes,glsl_type_size_align_func type_info)2579 nir_lower_vars_to_explicit_types(nir_shader *shader,
2580 nir_variable_mode modes,
2581 glsl_type_size_align_func type_info)
2582 {
2583 /* TODO: Situations which need to be handled to support more modes:
2584 * - row-major matrices
2585 * - compact shader inputs/outputs
2586 * - interface types
2587 */
2588 ASSERTED nir_variable_mode supported =
2589 nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant |
2590 nir_var_shader_temp | nir_var_function_temp | nir_var_uniform |
2591 nir_var_shader_call_data | nir_var_ray_hit_attrib |
2592 nir_var_mem_task_payload | nir_var_mem_node_payload |
2593 nir_var_mem_node_payload_in;
2594 assert(!(modes & ~supported) && "unsupported");
2595
2596 bool progress = false;
2597
2598 if (modes & nir_var_uniform)
2599 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info);
2600 if (modes & nir_var_mem_global)
2601 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info);
2602
2603 if (modes & nir_var_mem_shared) {
2604 assert(!shader->info.shared_memory_explicit_layout);
2605 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info);
2606 }
2607
2608 if (modes & nir_var_shader_temp)
2609 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info);
2610 if (modes & nir_var_mem_constant)
2611 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info);
2612 if (modes & nir_var_shader_call_data)
2613 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info);
2614 if (modes & nir_var_ray_hit_attrib)
2615 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info);
2616 if (modes & nir_var_mem_task_payload)
2617 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_task_payload, type_info);
2618 if (modes & nir_var_mem_node_payload)
2619 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload, type_info);
2620 if (modes & nir_var_mem_node_payload_in)
2621 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload_in, type_info);
2622
2623 nir_foreach_function_impl(impl, shader) {
2624 if (modes & nir_var_function_temp)
2625 progress |= lower_vars_to_explicit(shader, &impl->locals, nir_var_function_temp, type_info);
2626
2627 progress |= nir_lower_vars_to_explicit_types_impl(impl, modes, type_info);
2628 }
2629
2630 return progress;
2631 }
2632
2633 static void
write_constant(void * dst,size_t dst_size,const nir_constant * c,const struct glsl_type * type)2634 write_constant(void *dst, size_t dst_size,
2635 const nir_constant *c, const struct glsl_type *type)
2636 {
2637 if (c->is_null_constant) {
2638 memset(dst, 0, dst_size);
2639 return;
2640 }
2641
2642 if (glsl_type_is_vector_or_scalar(type)) {
2643 const unsigned num_components = glsl_get_vector_elements(type);
2644 const unsigned bit_size = glsl_get_bit_size(type);
2645 if (bit_size == 1) {
2646 /* Booleans are special-cased to be 32-bit
2647 *
2648 * TODO: Make the native bool bit_size an option.
2649 */
2650 assert(num_components * 4 <= dst_size);
2651 for (unsigned i = 0; i < num_components; i++) {
2652 int32_t b32 = -(int)c->values[i].b;
2653 memcpy((char *)dst + i * 4, &b32, 4);
2654 }
2655 } else {
2656 assert(bit_size >= 8 && bit_size % 8 == 0);
2657 const unsigned byte_size = bit_size / 8;
2658 assert(num_components * byte_size <= dst_size);
2659 for (unsigned i = 0; i < num_components; i++) {
2660 /* Annoyingly, thanks to packed structs, we can't make any
2661 * assumptions about the alignment of dst. To avoid any strange
2662 * issues with unaligned writes, we always use memcpy.
2663 */
2664 memcpy((char *)dst + i * byte_size, &c->values[i], byte_size);
2665 }
2666 }
2667 } else if (glsl_type_is_array_or_matrix(type)) {
2668 const unsigned array_len = glsl_get_length(type);
2669 const unsigned stride = glsl_get_explicit_stride(type);
2670 assert(stride > 0);
2671 const struct glsl_type *elem_type = glsl_get_array_element(type);
2672 for (unsigned i = 0; i < array_len; i++) {
2673 unsigned elem_offset = i * stride;
2674 assert(elem_offset < dst_size);
2675 write_constant((char *)dst + elem_offset, dst_size - elem_offset,
2676 c->elements[i], elem_type);
2677 }
2678 } else {
2679 assert(glsl_type_is_struct_or_ifc(type));
2680 const unsigned num_fields = glsl_get_length(type);
2681 for (unsigned i = 0; i < num_fields; i++) {
2682 const int field_offset = glsl_get_struct_field_offset(type, i);
2683 assert(field_offset >= 0 && field_offset < dst_size);
2684 const struct glsl_type *field_type = glsl_get_struct_field(type, i);
2685 write_constant((char *)dst + field_offset, dst_size - field_offset,
2686 c->elements[i], field_type);
2687 }
2688 }
2689 }
2690
2691 void
nir_gather_explicit_io_initializers(nir_shader * shader,void * dst,size_t dst_size,nir_variable_mode mode)2692 nir_gather_explicit_io_initializers(nir_shader *shader,
2693 void *dst, size_t dst_size,
2694 nir_variable_mode mode)
2695 {
2696 /* It doesn't really make sense to gather initializers for more than one
2697 * mode at a time. If this ever becomes well-defined, we can drop the
2698 * assert then.
2699 */
2700 assert(util_bitcount(mode) == 1);
2701
2702 nir_foreach_variable_with_modes(var, shader, mode) {
2703 assert(var->data.driver_location < dst_size);
2704 write_constant((char *)dst + var->data.driver_location,
2705 dst_size - var->data.driver_location,
2706 var->constant_initializer, var->type);
2707 }
2708 }
2709
2710 /**
2711 * Return the offset source number for a load/store intrinsic or -1 if there's no offset.
2712 */
2713 int
nir_get_io_offset_src_number(const nir_intrinsic_instr * instr)2714 nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
2715 {
2716 switch (instr->intrinsic) {
2717 case nir_intrinsic_load_input:
2718 case nir_intrinsic_load_output:
2719 case nir_intrinsic_load_shared:
2720 case nir_intrinsic_load_task_payload:
2721 case nir_intrinsic_load_uniform:
2722 case nir_intrinsic_load_kernel_input:
2723 case nir_intrinsic_load_global:
2724 case nir_intrinsic_load_global_2x32:
2725 case nir_intrinsic_load_global_constant:
2726 case nir_intrinsic_load_scratch:
2727 case nir_intrinsic_load_fs_input_interp_deltas:
2728 case nir_intrinsic_shared_atomic:
2729 case nir_intrinsic_shared_atomic_swap:
2730 case nir_intrinsic_task_payload_atomic:
2731 case nir_intrinsic_task_payload_atomic_swap:
2732 case nir_intrinsic_global_atomic:
2733 case nir_intrinsic_global_atomic_swap:
2734 return 0;
2735 case nir_intrinsic_load_ubo:
2736 case nir_intrinsic_load_ssbo:
2737 case nir_intrinsic_load_input_vertex:
2738 case nir_intrinsic_load_per_vertex_input:
2739 case nir_intrinsic_load_per_vertex_output:
2740 case nir_intrinsic_load_per_primitive_output:
2741 case nir_intrinsic_load_interpolated_input:
2742 case nir_intrinsic_store_output:
2743 case nir_intrinsic_store_shared:
2744 case nir_intrinsic_store_task_payload:
2745 case nir_intrinsic_store_global:
2746 case nir_intrinsic_store_global_2x32:
2747 case nir_intrinsic_store_scratch:
2748 case nir_intrinsic_ssbo_atomic:
2749 case nir_intrinsic_ssbo_atomic_swap:
2750 return 1;
2751 case nir_intrinsic_store_ssbo:
2752 case nir_intrinsic_store_per_vertex_output:
2753 case nir_intrinsic_store_per_primitive_output:
2754 return 2;
2755 default:
2756 return -1;
2757 }
2758 }
2759
2760 /**
2761 * Return the offset source for a load/store intrinsic.
2762 */
2763 nir_src *
nir_get_io_offset_src(nir_intrinsic_instr * instr)2764 nir_get_io_offset_src(nir_intrinsic_instr *instr)
2765 {
2766 const int idx = nir_get_io_offset_src_number(instr);
2767 return idx >= 0 ? &instr->src[idx] : NULL;
2768 }
2769
2770 /**
2771 * Return the vertex index source number for a load/store per_vertex intrinsic or -1 if there's no offset.
2772 */
2773 int
nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr * instr)2774 nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr *instr)
2775 {
2776 switch (instr->intrinsic) {
2777 case nir_intrinsic_load_per_vertex_input:
2778 case nir_intrinsic_load_per_vertex_output:
2779 case nir_intrinsic_load_per_primitive_output:
2780 return 0;
2781 case nir_intrinsic_store_per_vertex_output:
2782 case nir_intrinsic_store_per_primitive_output:
2783 return 1;
2784 default:
2785 return -1;
2786 }
2787 }
2788
2789 /**
2790 * Return the vertex index source for a load/store per_vertex intrinsic.
2791 */
2792 nir_src *
nir_get_io_arrayed_index_src(nir_intrinsic_instr * instr)2793 nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr)
2794 {
2795 const int idx = nir_get_io_arrayed_index_src_number(instr);
2796 return idx >= 0 ? &instr->src[idx] : NULL;
2797 }
2798
2799 /**
2800 * Return the numeric constant that identify a NULL pointer for each address
2801 * format.
2802 */
2803 const nir_const_value *
nir_address_format_null_value(nir_address_format addr_format)2804 nir_address_format_null_value(nir_address_format addr_format)
2805 {
2806 const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
2807 [nir_address_format_32bit_global] = { { 0 } },
2808 [nir_address_format_2x32bit_global] = { { 0 } },
2809 [nir_address_format_64bit_global] = { { 0 } },
2810 [nir_address_format_64bit_global_32bit_offset] = { { 0 } },
2811 [nir_address_format_64bit_bounded_global] = { { 0 } },
2812 [nir_address_format_32bit_index_offset] = { { .u32 = ~0 }, { .u32 = ~0 } },
2813 [nir_address_format_32bit_index_offset_pack64] = { { .u64 = ~0ull } },
2814 [nir_address_format_vec2_index_32bit_offset] = { { .u32 = ~0 }, { .u32 = ~0 }, { .u32 = ~0 } },
2815 [nir_address_format_32bit_offset] = { { .u32 = ~0 } },
2816 [nir_address_format_32bit_offset_as_64bit] = { { .u64 = ~0ull } },
2817 [nir_address_format_62bit_generic] = { { .u64 = 0 } },
2818 [nir_address_format_logical] = { { .u32 = ~0 } },
2819 };
2820
2821 assert(addr_format < ARRAY_SIZE(null_values));
2822 return null_values[addr_format];
2823 }
2824
2825 nir_def *
nir_build_addr_ieq(nir_builder * b,nir_def * addr0,nir_def * addr1,nir_address_format addr_format)2826 nir_build_addr_ieq(nir_builder *b, nir_def *addr0, nir_def *addr1,
2827 nir_address_format addr_format)
2828 {
2829 switch (addr_format) {
2830 case nir_address_format_32bit_global:
2831 case nir_address_format_2x32bit_global:
2832 case nir_address_format_64bit_global:
2833 case nir_address_format_64bit_bounded_global:
2834 case nir_address_format_32bit_index_offset:
2835 case nir_address_format_vec2_index_32bit_offset:
2836 case nir_address_format_32bit_offset:
2837 case nir_address_format_62bit_generic:
2838 return nir_ball_iequal(b, addr0, addr1);
2839
2840 case nir_address_format_64bit_global_32bit_offset:
2841 return nir_ball_iequal(b, nir_channels(b, addr0, 0xb),
2842 nir_channels(b, addr1, 0xb));
2843
2844 case nir_address_format_32bit_offset_as_64bit:
2845 assert(addr0->num_components == 1 && addr1->num_components == 1);
2846 return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1));
2847
2848 case nir_address_format_32bit_index_offset_pack64:
2849 assert(addr0->num_components == 1 && addr1->num_components == 1);
2850 return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1));
2851
2852 case nir_address_format_logical:
2853 unreachable("Unsupported address format");
2854 }
2855
2856 unreachable("Invalid address format");
2857 }
2858
2859 nir_def *
nir_build_addr_isub(nir_builder * b,nir_def * addr0,nir_def * addr1,nir_address_format addr_format)2860 nir_build_addr_isub(nir_builder *b, nir_def *addr0, nir_def *addr1,
2861 nir_address_format addr_format)
2862 {
2863 switch (addr_format) {
2864 case nir_address_format_32bit_global:
2865 case nir_address_format_64bit_global:
2866 case nir_address_format_32bit_offset:
2867 case nir_address_format_32bit_index_offset_pack64:
2868 case nir_address_format_62bit_generic:
2869 assert(addr0->num_components == 1);
2870 assert(addr1->num_components == 1);
2871 return nir_isub(b, addr0, addr1);
2872
2873 case nir_address_format_2x32bit_global:
2874 return nir_isub(b, addr_to_global(b, addr0, addr_format),
2875 addr_to_global(b, addr1, addr_format));
2876
2877 case nir_address_format_32bit_offset_as_64bit:
2878 assert(addr0->num_components == 1);
2879 assert(addr1->num_components == 1);
2880 return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)));
2881
2882 case nir_address_format_64bit_global_32bit_offset:
2883 case nir_address_format_64bit_bounded_global:
2884 return nir_isub(b, addr_to_global(b, addr0, addr_format),
2885 addr_to_global(b, addr1, addr_format));
2886
2887 case nir_address_format_32bit_index_offset:
2888 assert(addr0->num_components == 2);
2889 assert(addr1->num_components == 2);
2890 /* Assume the same buffer index. */
2891 return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
2892
2893 case nir_address_format_vec2_index_32bit_offset:
2894 assert(addr0->num_components == 3);
2895 assert(addr1->num_components == 3);
2896 /* Assume the same buffer index. */
2897 return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2));
2898
2899 case nir_address_format_logical:
2900 unreachable("Unsupported address format");
2901 }
2902
2903 unreachable("Invalid address format");
2904 }
2905
2906 static bool
is_input(nir_intrinsic_instr * intrin)2907 is_input(nir_intrinsic_instr *intrin)
2908 {
2909 return intrin->intrinsic == nir_intrinsic_load_input ||
2910 intrin->intrinsic == nir_intrinsic_load_input_vertex ||
2911 intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
2912 intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
2913 intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
2914 }
2915
2916 static bool
is_output(nir_intrinsic_instr * intrin)2917 is_output(nir_intrinsic_instr *intrin)
2918 {
2919 return intrin->intrinsic == nir_intrinsic_load_output ||
2920 intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
2921 intrin->intrinsic == nir_intrinsic_load_per_primitive_output ||
2922 intrin->intrinsic == nir_intrinsic_store_output ||
2923 intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2924 intrin->intrinsic == nir_intrinsic_store_per_primitive_output;
2925 }
2926
2927 static bool
is_dual_slot(nir_intrinsic_instr * intrin)2928 is_dual_slot(nir_intrinsic_instr *intrin)
2929 {
2930 if (intrin->intrinsic == nir_intrinsic_store_output ||
2931 intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2932 intrin->intrinsic == nir_intrinsic_store_per_primitive_output) {
2933 return nir_src_bit_size(intrin->src[0]) == 64 &&
2934 nir_src_num_components(intrin->src[0]) >= 3;
2935 }
2936
2937 return intrin->def.bit_size == 64 &&
2938 intrin->def.num_components >= 3;
2939 }
2940
2941 /**
2942 * This pass adds constant offsets to instr->const_index[0] for input/output
2943 * intrinsics, and resets the offset source to 0. Non-constant offsets remain
2944 * unchanged - since we don't know what part of a compound variable is
2945 * accessed, we allocate storage for the entire thing. For drivers that use
2946 * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
2947 * the offset source will be 0, so that they don't have to add it in manually.
2948 */
2949
2950 static bool
add_const_offset_to_base_block(nir_block * block,nir_builder * b,nir_variable_mode modes)2951 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
2952 nir_variable_mode modes)
2953 {
2954 bool progress = false;
2955 nir_foreach_instr_safe(instr, block) {
2956 if (instr->type != nir_instr_type_intrinsic)
2957 continue;
2958
2959 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2960
2961 if (((modes & nir_var_shader_in) && is_input(intrin)) ||
2962 ((modes & nir_var_shader_out) && is_output(intrin))) {
2963 nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
2964
2965 /* NV_mesh_shader: ignore MS primitive indices. */
2966 if (b->shader->info.stage == MESA_SHADER_MESH &&
2967 sem.location == VARYING_SLOT_PRIMITIVE_INDICES &&
2968 !(b->shader->info.per_primitive_outputs &
2969 BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES)))
2970 continue;
2971
2972 nir_src *offset = nir_get_io_offset_src(intrin);
2973
2974 /* TODO: Better handling of per-view variables here */
2975 if (nir_src_is_const(*offset) &&
2976 !nir_intrinsic_io_semantics(intrin).per_view) {
2977 unsigned off = nir_src_as_uint(*offset);
2978
2979 nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off);
2980
2981 sem.location += off;
2982 /* non-indirect indexing should reduce num_slots */
2983 sem.num_slots = is_dual_slot(intrin) ? 2 : 1;
2984 nir_intrinsic_set_io_semantics(intrin, sem);
2985
2986 b->cursor = nir_before_instr(&intrin->instr);
2987 nir_src_rewrite(offset, nir_imm_int(b, 0));
2988 progress = true;
2989 }
2990 }
2991 }
2992
2993 return progress;
2994 }
2995
2996 bool
nir_io_add_const_offset_to_base(nir_shader * nir,nir_variable_mode modes)2997 nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes)
2998 {
2999 bool progress = false;
3000
3001 nir_foreach_function_impl(impl, nir) {
3002 bool impl_progress = false;
3003 nir_builder b = nir_builder_create(impl);
3004 nir_foreach_block(block, impl) {
3005 impl_progress |= add_const_offset_to_base_block(block, &b, modes);
3006 }
3007 progress |= impl_progress;
3008 if (impl_progress)
3009 nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
3010 else
3011 nir_metadata_preserve(impl, nir_metadata_all);
3012 }
3013
3014 return progress;
3015 }
3016
3017 bool
nir_lower_color_inputs(nir_shader * nir)3018 nir_lower_color_inputs(nir_shader *nir)
3019 {
3020 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3021 bool progress = false;
3022
3023 nir_builder b = nir_builder_create(impl);
3024
3025 nir_foreach_block(block, impl) {
3026 nir_foreach_instr_safe(instr, block) {
3027 if (instr->type != nir_instr_type_intrinsic)
3028 continue;
3029
3030 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3031
3032 if (intrin->intrinsic != nir_intrinsic_load_input &&
3033 intrin->intrinsic != nir_intrinsic_load_interpolated_input)
3034 continue;
3035
3036 nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
3037
3038 if (sem.location != VARYING_SLOT_COL0 &&
3039 sem.location != VARYING_SLOT_COL1)
3040 continue;
3041
3042 /* Default to FLAT (for load_input) */
3043 enum glsl_interp_mode interp = INTERP_MODE_FLAT;
3044 bool sample = false;
3045 bool centroid = false;
3046
3047 if (intrin->intrinsic == nir_intrinsic_load_interpolated_input) {
3048 nir_intrinsic_instr *baryc =
3049 nir_instr_as_intrinsic(intrin->src[0].ssa->parent_instr);
3050
3051 centroid =
3052 baryc->intrinsic == nir_intrinsic_load_barycentric_centroid;
3053 sample =
3054 baryc->intrinsic == nir_intrinsic_load_barycentric_sample;
3055 assert(centroid || sample ||
3056 baryc->intrinsic == nir_intrinsic_load_barycentric_pixel);
3057
3058 interp = nir_intrinsic_interp_mode(baryc);
3059 }
3060
3061 b.cursor = nir_before_instr(instr);
3062 nir_def *load = NULL;
3063
3064 if (sem.location == VARYING_SLOT_COL0) {
3065 load = nir_load_color0(&b);
3066 nir->info.fs.color0_interp = interp;
3067 nir->info.fs.color0_sample = sample;
3068 nir->info.fs.color0_centroid = centroid;
3069 } else {
3070 assert(sem.location == VARYING_SLOT_COL1);
3071 load = nir_load_color1(&b);
3072 nir->info.fs.color1_interp = interp;
3073 nir->info.fs.color1_sample = sample;
3074 nir->info.fs.color1_centroid = centroid;
3075 }
3076
3077 if (intrin->num_components != 4) {
3078 unsigned start = nir_intrinsic_component(intrin);
3079 unsigned count = intrin->num_components;
3080 load = nir_channels(&b, load, BITFIELD_RANGE(start, count));
3081 }
3082
3083 nir_def_rewrite_uses(&intrin->def, load);
3084 nir_instr_remove(instr);
3085 progress = true;
3086 }
3087 }
3088
3089 if (progress) {
3090 nir_metadata_preserve(impl, nir_metadata_dominance |
3091 nir_metadata_block_index);
3092 } else {
3093 nir_metadata_preserve(impl, nir_metadata_all);
3094 }
3095 return progress;
3096 }
3097
3098 bool
nir_io_add_intrinsic_xfb_info(nir_shader * nir)3099 nir_io_add_intrinsic_xfb_info(nir_shader *nir)
3100 {
3101 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3102 bool progress = false;
3103
3104 for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++)
3105 nir->info.xfb_stride[i] = nir->xfb_info->buffers[i].stride / 4;
3106
3107 nir_foreach_block(block, impl) {
3108 nir_foreach_instr_safe(instr, block) {
3109 if (instr->type != nir_instr_type_intrinsic)
3110 continue;
3111
3112 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3113
3114 if (!nir_intrinsic_has_io_xfb(intr))
3115 continue;
3116
3117 /* No indirect indexing allowed. The index is implied to be 0. */
3118 ASSERTED nir_src offset = *nir_get_io_offset_src(intr);
3119 assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
3120
3121 /* Calling this pass for the second time shouldn't do anything. */
3122 if (nir_intrinsic_io_xfb(intr).out[0].num_components ||
3123 nir_intrinsic_io_xfb(intr).out[1].num_components ||
3124 nir_intrinsic_io_xfb2(intr).out[0].num_components ||
3125 nir_intrinsic_io_xfb2(intr).out[1].num_components)
3126 continue;
3127
3128 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3129 unsigned writemask = nir_intrinsic_write_mask(intr) << nir_intrinsic_component(intr);
3130
3131 nir_io_xfb xfb[2];
3132 memset(xfb, 0, sizeof(xfb));
3133
3134 for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
3135 nir_xfb_output_info *out = &nir->xfb_info->outputs[i];
3136 if (out->location == sem.location) {
3137 unsigned xfb_mask = writemask & out->component_mask;
3138
3139 /*fprintf(stdout, "output%u: buffer=%u, offset=%u, location=%u, "
3140 "component_offset=%u, component_mask=0x%x, xfb_mask=0x%x, slots=%u\n",
3141 i, out->buffer,
3142 out->offset,
3143 out->location,
3144 out->component_offset,
3145 out->component_mask,
3146 xfb_mask, sem.num_slots);*/
3147
3148 while (xfb_mask) {
3149 int start, count;
3150 u_bit_scan_consecutive_range(&xfb_mask, &start, &count);
3151
3152 xfb[start / 2].out[start % 2].num_components = count;
3153 xfb[start / 2].out[start % 2].buffer = out->buffer;
3154 /* out->offset is relative to the first stored xfb component */
3155 /* start is relative to component 0 */
3156 xfb[start / 2].out[start % 2].offset =
3157 out->offset / 4 - out->component_offset + start;
3158
3159 progress = true;
3160 }
3161 }
3162 }
3163
3164 nir_intrinsic_set_io_xfb(intr, xfb[0]);
3165 nir_intrinsic_set_io_xfb2(intr, xfb[1]);
3166 }
3167 }
3168
3169 nir_metadata_preserve(impl, nir_metadata_all);
3170 return progress;
3171 }
3172
3173 static int
type_size_vec4(const struct glsl_type * type,bool bindless)3174 type_size_vec4(const struct glsl_type *type, bool bindless)
3175 {
3176 return glsl_count_attribute_slots(type, false);
3177 }
3178
3179 /**
3180 * This runs all compiler passes needed to lower IO, lower indirect IO access,
3181 * set transform feedback info in IO intrinsics, and clean up the IR.
3182 *
3183 * \param renumber_vs_inputs
3184 * Set to true if holes between VS inputs should be removed, which is safe
3185 * to do in any shader linker that can handle that. Set to false if you want
3186 * to keep holes between VS inputs, which is recommended to do in gallium
3187 * drivers so as not to break the mapping of vertex elements to VS inputs
3188 * expected by gallium frontends.
3189 */
3190 void
nir_lower_io_passes(nir_shader * nir,bool renumber_vs_inputs)3191 nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
3192 {
3193 if (nir->info.stage == MESA_SHADER_COMPUTE)
3194 return;
3195
3196 bool has_indirect_inputs =
3197 (nir->options->support_indirect_inputs >> nir->info.stage) & 0x1;
3198
3199 /* Transform feedback requires that indirect outputs are lowered. */
3200 bool has_indirect_outputs =
3201 (nir->options->support_indirect_outputs >> nir->info.stage) & 0x1 &&
3202 nir->xfb_info == NULL;
3203
3204 /* TODO: Sorting variables by location is required due to some bug
3205 * in nir_lower_io_to_temporaries. If variables are not sorted,
3206 * dEQP-GLES31.functional.separate_shader.random.0 fails.
3207 *
3208 * This isn't needed if nir_assign_io_var_locations is called because it
3209 * also sorts variables. However, if IO is lowered sooner than that, we
3210 * must sort explicitly here to get what nir_assign_io_var_locations does.
3211 */
3212 unsigned varying_var_mask =
3213 (nir->info.stage != MESA_SHADER_VERTEX ? nir_var_shader_in : 0) |
3214 (nir->info.stage != MESA_SHADER_FRAGMENT ? nir_var_shader_out : 0);
3215 nir_sort_variables_by_location(nir, varying_var_mask);
3216
3217 if (!has_indirect_inputs || !has_indirect_outputs) {
3218 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
3219 nir_shader_get_entrypoint(nir), !has_indirect_outputs,
3220 !has_indirect_inputs);
3221
3222 /* We need to lower all the copy_deref's introduced by lower_io_to-
3223 * _temporaries before calling nir_lower_io.
3224 */
3225 NIR_PASS_V(nir, nir_split_var_copies);
3226 NIR_PASS_V(nir, nir_lower_var_copies);
3227 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
3228 }
3229
3230 NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in,
3231 type_size_vec4, nir_lower_io_lower_64bit_to_32);
3232
3233 /* nir_io_add_const_offset_to_base needs actual constants. */
3234 NIR_PASS_V(nir, nir_opt_constant_folding);
3235 NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
3236
3237 /* Lower and remove dead derefs and variables to clean up the IR. */
3238 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3239 NIR_PASS_V(nir, nir_opt_dce);
3240 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3241
3242 /* If IO is lowered before var->data.driver_location is assigned, driver
3243 * locations are all 0, which means IO bases are all 0. It's not necessary
3244 * to set driver_location before lowering IO because the only thing that
3245 * identifies outputs is their semantic, and IO bases can always be
3246 * computed from the semantics.
3247 *
3248 * This assigns IO bases from scratch, using IO semantics to tell which
3249 * intrinsics refer to the same IO. If the bases already exist, they
3250 * will be reassigned, sorted by the semantic, and all holes removed.
3251 * This kind of canonicalizes all bases.
3252 *
3253 * This must be done after DCE to remove dead load_input intrinsics.
3254 */
3255 NIR_PASS_V(nir, nir_recompute_io_bases,
3256 (nir->info.stage != MESA_SHADER_VERTEX || renumber_vs_inputs ?
3257 nir_var_shader_in : 0) | nir_var_shader_out);
3258
3259 if (nir->xfb_info)
3260 NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
3261
3262 if (nir->options->lower_mediump_io)
3263 nir->options->lower_mediump_io(nir);
3264
3265 nir->info.io_lowered = true;
3266 }
3267