1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /*
25 * This lowering pass converts references to input/output variables with
26 * loads/stores to actual input/output intrinsics.
27 */
28
29 #include "nir.h"
30 #include "nir_builder.h"
31 #include "nir_deref.h"
32 #include "nir_xfb_info.h"
33
34 #include "util/u_math.h"
35
36 struct lower_io_state {
37 void *dead_ctx;
38 nir_builder builder;
39 int (*type_size)(const struct glsl_type *type, bool);
40 nir_variable_mode modes;
41 nir_lower_io_options options;
42 struct set variable_names;
43 };
44
45 static const char *
add_variable_name(struct lower_io_state * state,const char * name)46 add_variable_name(struct lower_io_state *state, const char *name)
47 {
48 if (!name)
49 return NULL;
50
51 bool found = false;
52 struct set_entry *entry = _mesa_set_search_or_add(&state->variable_names, name, &found);
53 if (!found)
54 entry->key = (void*)ralloc_strdup(state->builder.shader, name);
55 return entry->key;
56 }
57
58 static nir_intrinsic_op
ssbo_atomic_for_deref(nir_intrinsic_op deref_op)59 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
60 {
61 switch (deref_op) {
62 case nir_intrinsic_deref_atomic:
63 return nir_intrinsic_ssbo_atomic;
64 case nir_intrinsic_deref_atomic_swap:
65 return nir_intrinsic_ssbo_atomic_swap;
66 default:
67 unreachable("Invalid SSBO atomic");
68 }
69 }
70
71 static nir_intrinsic_op
global_atomic_for_deref(nir_address_format addr_format,nir_intrinsic_op deref_op)72 global_atomic_for_deref(nir_address_format addr_format,
73 nir_intrinsic_op deref_op)
74 {
75 switch (deref_op) {
76 case nir_intrinsic_deref_atomic:
77 if (addr_format != nir_address_format_2x32bit_global)
78 return nir_intrinsic_global_atomic;
79 else
80 return nir_intrinsic_global_atomic_2x32;
81
82 case nir_intrinsic_deref_atomic_swap:
83 if (addr_format != nir_address_format_2x32bit_global)
84 return nir_intrinsic_global_atomic_swap;
85 else
86 return nir_intrinsic_global_atomic_swap_2x32;
87
88 default:
89 unreachable("Invalid SSBO atomic");
90 }
91 }
92
93 static nir_intrinsic_op
shared_atomic_for_deref(nir_intrinsic_op deref_op)94 shared_atomic_for_deref(nir_intrinsic_op deref_op)
95 {
96 switch (deref_op) {
97 case nir_intrinsic_deref_atomic:
98 return nir_intrinsic_shared_atomic;
99 case nir_intrinsic_deref_atomic_swap:
100 return nir_intrinsic_shared_atomic_swap;
101 default:
102 unreachable("Invalid shared atomic");
103 }
104 }
105
106 static nir_intrinsic_op
task_payload_atomic_for_deref(nir_intrinsic_op deref_op)107 task_payload_atomic_for_deref(nir_intrinsic_op deref_op)
108 {
109 switch (deref_op) {
110 case nir_intrinsic_deref_atomic:
111 return nir_intrinsic_task_payload_atomic;
112 case nir_intrinsic_deref_atomic_swap:
113 return nir_intrinsic_task_payload_atomic_swap;
114 default:
115 unreachable("Invalid task payload atomic");
116 }
117 }
118
119 void
nir_assign_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,int (* type_size)(const struct glsl_type *,bool))120 nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
121 unsigned *size,
122 int (*type_size)(const struct glsl_type *, bool))
123 {
124 unsigned location = 0;
125
126 nir_foreach_variable_with_modes(var, shader, mode) {
127 var->data.driver_location = location;
128 bool bindless_type_size = var->data.mode == nir_var_shader_in ||
129 var->data.mode == nir_var_shader_out ||
130 var->data.bindless;
131 location += type_size(var->type, bindless_type_size);
132 }
133
134 *size = location;
135 }
136
137 /**
138 * Some inputs and outputs are arrayed, meaning that there is an extra level
139 * of array indexing to handle mismatches between the shader interface and the
140 * dispatch pattern of the shader. For instance, geometry shaders are
141 * executed per-primitive while their inputs and outputs are specified
142 * per-vertex so all inputs and outputs have to be additionally indexed with
143 * the vertex index within the primitive.
144 */
145 bool
nir_is_arrayed_io(const nir_variable * var,gl_shader_stage stage)146 nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage)
147 {
148 if (var->data.patch || !glsl_type_is_array(var->type))
149 return false;
150
151 if (var->data.per_view) {
152 /* Nested arrayed outputs (both per-view and per-{vertex,primitive}) are
153 * unsupported. */
154 assert(stage == MESA_SHADER_VERTEX);
155 assert(var->data.mode == nir_var_shader_out);
156 return true;
157 }
158
159 if (stage == MESA_SHADER_MESH) {
160 /* NV_mesh_shader: this is flat array for the whole workgroup. */
161 if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES)
162 return var->data.per_primitive;
163 }
164
165 if (var->data.mode == nir_var_shader_in) {
166 if (var->data.per_vertex) {
167 assert(stage == MESA_SHADER_FRAGMENT);
168 return true;
169 }
170
171 return stage == MESA_SHADER_GEOMETRY ||
172 stage == MESA_SHADER_TESS_CTRL ||
173 stage == MESA_SHADER_TESS_EVAL;
174 }
175
176 if (var->data.mode == nir_var_shader_out)
177 return stage == MESA_SHADER_TESS_CTRL ||
178 stage == MESA_SHADER_MESH;
179
180 return false;
181 }
182
183 static bool
uses_high_dvec2_semantic(struct lower_io_state * state,const nir_variable * var)184 uses_high_dvec2_semantic(struct lower_io_state *state,
185 const nir_variable *var)
186 {
187 return state->builder.shader->info.stage == MESA_SHADER_VERTEX &&
188 state->options & nir_lower_io_lower_64bit_to_32_new &&
189 var->data.mode == nir_var_shader_in &&
190 glsl_type_is_dual_slot(glsl_without_array(var->type));
191 }
192
193 static unsigned
get_number_of_slots(struct lower_io_state * state,const nir_variable * var)194 get_number_of_slots(struct lower_io_state *state,
195 const nir_variable *var)
196 {
197 const struct glsl_type *type = var->type;
198
199 if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) {
200 assert(glsl_type_is_array(type));
201 type = glsl_get_array_element(type);
202 }
203
204 /* NV_mesh_shader:
205 * PRIMITIVE_INDICES is a flat array, not a proper arrayed output,
206 * as opposed to D3D-style mesh shaders where it's addressed by
207 * the primitive index.
208 * Prevent assigning several slots to primitive indices,
209 * to avoid some issues.
210 */
211 if (state->builder.shader->info.stage == MESA_SHADER_MESH &&
212 var->data.location == VARYING_SLOT_PRIMITIVE_INDICES &&
213 !nir_is_arrayed_io(var, state->builder.shader->info.stage))
214 return 1;
215
216 return state->type_size(type, var->data.bindless) /
217 (uses_high_dvec2_semantic(state, var) ? 2 : 1);
218 }
219
220 static nir_def *
get_io_offset(nir_builder * b,nir_deref_instr * deref,nir_def ** array_index,int (* type_size)(const struct glsl_type *,bool),unsigned * component,bool bts)221 get_io_offset(nir_builder *b, nir_deref_instr *deref,
222 nir_def **array_index,
223 int (*type_size)(const struct glsl_type *, bool),
224 unsigned *component, bool bts)
225 {
226 nir_deref_path path;
227 nir_deref_path_init(&path, deref, NULL);
228
229 assert(path.path[0]->deref_type == nir_deref_type_var);
230 nir_deref_instr **p = &path.path[1];
231
232 /* For arrayed I/O (e.g., per-vertex input arrays in geometry shader
233 * inputs), skip the outermost array index. Process the rest normally.
234 */
235 if (array_index != NULL) {
236 assert((*p)->deref_type == nir_deref_type_array);
237 *array_index = (*p)->arr.index.ssa;
238 p++;
239 }
240
241 if (path.path[0]->var->data.compact && nir_src_is_const((*p)->arr.index)) {
242 assert((*p)->deref_type == nir_deref_type_array);
243 assert(glsl_type_is_scalar((*p)->type));
244
245 /* We always lower indirect dereferences for "compact" array vars. */
246 const unsigned index = nir_src_as_uint((*p)->arr.index);
247 const unsigned total_offset = *component + index;
248 const unsigned slot_offset = total_offset / 4;
249 *component = total_offset % 4;
250 return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
251 }
252
253 /* Just emit code and let constant-folding go to town */
254 nir_def *offset = nir_imm_int(b, 0);
255
256 for (; *p; p++) {
257 if ((*p)->deref_type == nir_deref_type_array) {
258 unsigned size = type_size((*p)->type, bts);
259
260 nir_def *mul =
261 nir_amul_imm(b, (*p)->arr.index.ssa, size);
262
263 offset = nir_iadd(b, offset, mul);
264 } else if ((*p)->deref_type == nir_deref_type_struct) {
265 /* p starts at path[1], so this is safe */
266 nir_deref_instr *parent = *(p - 1);
267
268 unsigned field_offset = 0;
269 for (unsigned i = 0; i < (*p)->strct.index; i++) {
270 field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
271 }
272 offset = nir_iadd_imm(b, offset, field_offset);
273 } else {
274 unreachable("Unsupported deref type");
275 }
276 }
277
278 nir_deref_path_finish(&path);
279
280 return offset;
281 }
282
283 static bool
is_medium_precision(const nir_shader * shader,const nir_variable * var)284 is_medium_precision(const nir_shader *shader, const nir_variable *var)
285 {
286 if (shader->options->io_options & nir_io_mediump_is_32bit)
287 return false;
288
289 return var->data.precision == GLSL_PRECISION_MEDIUM ||
290 var->data.precision == GLSL_PRECISION_LOW;
291 }
292
293 static enum glsl_interp_mode
get_interp_mode(const nir_variable * var)294 get_interp_mode(const nir_variable *var)
295 {
296 unsigned interp_mode = var->data.interpolation;
297
298 /* INTERP_MODE_NONE is an artifact of OpenGL. Change it to SMOOTH
299 * to enable CSE between load_barycentric_pixel(NONE->SMOOTH) and
300 * load_barycentric_pixel(SMOOTH), which also enables IO vectorization when
301 * one component originally had NONE and an adjacent component had SMOOTH.
302 *
303 * Color varyings must preserve NONE. NONE for colors means that
304 * glShadeModel determines the interpolation mode.
305 */
306 if (var->data.location != VARYING_SLOT_COL0 &&
307 var->data.location != VARYING_SLOT_COL1 &&
308 var->data.location != VARYING_SLOT_BFC0 &&
309 var->data.location != VARYING_SLOT_BFC1 &&
310 interp_mode == INTERP_MODE_NONE)
311 return INTERP_MODE_SMOOTH;
312
313 return interp_mode;
314 }
315
316 static nir_def *
emit_load(struct lower_io_state * state,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,unsigned num_components,unsigned bit_size,nir_alu_type dest_type,bool high_dvec2)317 emit_load(struct lower_io_state *state,
318 nir_def *array_index, nir_variable *var, nir_def *offset,
319 unsigned component, unsigned num_components, unsigned bit_size,
320 nir_alu_type dest_type, bool high_dvec2)
321 {
322 nir_builder *b = &state->builder;
323 const nir_shader *nir = b->shader;
324 nir_variable_mode mode = var->data.mode;
325 nir_def *barycentric = NULL;
326
327 nir_intrinsic_op op;
328 switch (mode) {
329 case nir_var_shader_in:
330 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
331 state->options & nir_lower_io_use_interpolated_input_intrinsics &&
332 var->data.interpolation != INTERP_MODE_FLAT &&
333 !var->data.per_primitive) {
334 if (var->data.interpolation == INTERP_MODE_EXPLICIT ||
335 var->data.per_vertex) {
336 assert(array_index != NULL);
337 op = nir_intrinsic_load_input_vertex;
338 } else {
339 assert(array_index == NULL);
340
341 nir_intrinsic_op bary_op;
342 if (var->data.sample)
343 bary_op = nir_intrinsic_load_barycentric_sample;
344 else if (var->data.centroid)
345 bary_op = nir_intrinsic_load_barycentric_centroid;
346 else
347 bary_op = nir_intrinsic_load_barycentric_pixel;
348
349 barycentric = nir_load_barycentric(&state->builder, bary_op,
350 get_interp_mode(var));
351 op = nir_intrinsic_load_interpolated_input;
352 }
353 } else {
354 if (var->data.per_primitive)
355 op = nir_intrinsic_load_per_primitive_input;
356 else if (array_index)
357 op = nir_intrinsic_load_per_vertex_input;
358 else
359 op = nir_intrinsic_load_input;
360 }
361 break;
362 case nir_var_shader_out:
363 if (!array_index)
364 op = nir_intrinsic_load_output;
365 else if (var->data.per_primitive)
366 op = nir_intrinsic_load_per_primitive_output;
367 else if (var->data.per_view)
368 op = nir_intrinsic_load_per_view_output;
369 else
370 op = nir_intrinsic_load_per_vertex_output;
371 break;
372 case nir_var_uniform:
373 op = nir_intrinsic_load_uniform;
374 break;
375 default:
376 unreachable("Unknown variable mode");
377 }
378
379 nir_intrinsic_instr *load =
380 nir_intrinsic_instr_create(state->builder.shader, op);
381 load->num_components = num_components;
382 load->name = add_variable_name(state, var->name);
383
384 nir_intrinsic_set_base(load, var->data.driver_location);
385 if (nir_intrinsic_has_range(load)) {
386 const struct glsl_type *type = var->type;
387 if (array_index)
388 type = glsl_get_array_element(type);
389 unsigned var_size = state->type_size(type, var->data.bindless);
390 nir_intrinsic_set_range(load, var_size);
391 }
392
393 if (mode == nir_var_shader_in || mode == nir_var_shader_out)
394 nir_intrinsic_set_component(load, component);
395
396 if (nir_intrinsic_has_access(load))
397 nir_intrinsic_set_access(load, var->data.access);
398
399 nir_intrinsic_set_dest_type(load, dest_type);
400
401 if (load->intrinsic != nir_intrinsic_load_uniform) {
402 nir_io_semantics semantics = { 0 };
403 semantics.location = var->data.location;
404 semantics.num_slots = get_number_of_slots(state, var);
405 semantics.fb_fetch_output = var->data.fb_fetch_output;
406 if (semantics.fb_fetch_output) {
407 semantics.fb_fetch_output_coherent =
408 !!(var->data.access & ACCESS_COHERENT);
409 }
410 semantics.medium_precision = is_medium_precision(b->shader, var);
411 semantics.high_dvec2 = high_dvec2;
412 /* "per_vertex" is misnamed. It means "explicit interpolation with
413 * the original vertex order", which is a stricter version of
414 * INTERP_MODE_EXPLICIT.
415 */
416 semantics.interp_explicit_strict = var->data.per_vertex;
417 nir_intrinsic_set_io_semantics(load, semantics);
418 }
419
420 if (array_index) {
421 load->src[0] = nir_src_for_ssa(array_index);
422 load->src[1] = nir_src_for_ssa(offset);
423 } else if (barycentric) {
424 load->src[0] = nir_src_for_ssa(barycentric);
425 load->src[1] = nir_src_for_ssa(offset);
426 } else {
427 load->src[0] = nir_src_for_ssa(offset);
428 }
429
430 nir_def_init(&load->instr, &load->def, num_components, bit_size);
431 nir_builder_instr_insert(b, &load->instr);
432
433 return &load->def;
434 }
435
436 static nir_def *
lower_load(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,const struct glsl_type * type)437 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
438 nir_def *array_index, nir_variable *var, nir_def *offset,
439 unsigned component, const struct glsl_type *type)
440 {
441 const bool lower_double = !glsl_type_is_integer(type) && state->options & nir_lower_io_lower_64bit_float_to_32;
442 if (intrin->def.bit_size == 64 &&
443 (lower_double || (state->options & (nir_lower_io_lower_64bit_to_32_new |
444 nir_lower_io_lower_64bit_to_32)))) {
445 nir_builder *b = &state->builder;
446 bool use_high_dvec2_semantic = uses_high_dvec2_semantic(state, var);
447
448 /* Each slot is a dual slot, so divide the offset within the variable
449 * by 2.
450 */
451 if (use_high_dvec2_semantic)
452 offset = nir_ushr_imm(b, offset, 1);
453
454 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
455
456 nir_def *comp64[4];
457 assert(component == 0 || component == 2);
458 unsigned dest_comp = 0;
459 bool high_dvec2 = false;
460 while (dest_comp < intrin->def.num_components) {
461 const unsigned num_comps =
462 MIN2(intrin->def.num_components - dest_comp,
463 (4 - component) / 2);
464
465 nir_def *data32 =
466 emit_load(state, array_index, var, offset, component,
467 num_comps * 2, 32, nir_type_uint32, high_dvec2);
468 for (unsigned i = 0; i < num_comps; i++) {
469 comp64[dest_comp + i] =
470 nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
471 }
472
473 /* Only the first store has a component offset */
474 component = 0;
475 dest_comp += num_comps;
476
477 if (use_high_dvec2_semantic) {
478 /* Increment the offset when we wrap around the dual slot. */
479 if (high_dvec2)
480 offset = nir_iadd_imm(b, offset, slot_size);
481 high_dvec2 = !high_dvec2;
482 } else {
483 offset = nir_iadd_imm(b, offset, slot_size);
484 }
485 }
486
487 return nir_vec(b, comp64, intrin->def.num_components);
488 } else if (intrin->def.bit_size == 1) {
489 /* Booleans are 32-bit */
490 assert(glsl_type_is_boolean(type));
491 return nir_b2b1(&state->builder,
492 emit_load(state, array_index, var, offset, component,
493 intrin->def.num_components, 32,
494 nir_type_bool32, false));
495 } else {
496 return emit_load(state, array_index, var, offset, component,
497 intrin->def.num_components,
498 intrin->def.bit_size,
499 nir_get_nir_type_for_glsl_type(type), false);
500 }
501 }
502
503 static void
emit_store(struct lower_io_state * state,nir_def * data,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,unsigned num_components,nir_component_mask_t write_mask,nir_alu_type src_type)504 emit_store(struct lower_io_state *state, nir_def *data,
505 nir_def *array_index, nir_variable *var, nir_def *offset,
506 unsigned component, unsigned num_components,
507 nir_component_mask_t write_mask, nir_alu_type src_type)
508 {
509 nir_builder *b = &state->builder;
510
511 assert(var->data.mode == nir_var_shader_out);
512 nir_intrinsic_op op;
513 if (!array_index)
514 op = nir_intrinsic_store_output;
515 else if (var->data.per_view)
516 op = nir_intrinsic_store_per_view_output;
517 else if (var->data.per_primitive)
518 op = nir_intrinsic_store_per_primitive_output;
519 else
520 op = nir_intrinsic_store_per_vertex_output;
521
522 nir_intrinsic_instr *store =
523 nir_intrinsic_instr_create(state->builder.shader, op);
524 store->num_components = num_components;
525 store->name = add_variable_name(state, var->name);
526
527 store->src[0] = nir_src_for_ssa(data);
528
529 const struct glsl_type *type = var->type;
530 if (array_index)
531 type = glsl_get_array_element(type);
532 unsigned var_size = state->type_size(type, var->data.bindless);
533 nir_intrinsic_set_base(store, var->data.driver_location);
534 nir_intrinsic_set_range(store, var_size);
535 nir_intrinsic_set_component(store, component);
536 nir_intrinsic_set_src_type(store, src_type);
537
538 nir_intrinsic_set_write_mask(store, write_mask);
539
540 if (nir_intrinsic_has_access(store))
541 nir_intrinsic_set_access(store, var->data.access);
542
543 if (array_index)
544 store->src[1] = nir_src_for_ssa(array_index);
545
546 store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset);
547
548 unsigned gs_streams = 0;
549 if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) {
550 if (var->data.stream & NIR_STREAM_PACKED) {
551 gs_streams = var->data.stream & ~NIR_STREAM_PACKED;
552 } else {
553 assert(var->data.stream < 4);
554 gs_streams = 0;
555 for (unsigned i = 0; i < num_components; ++i)
556 gs_streams |= var->data.stream << (2 * i);
557 }
558 }
559
560 nir_io_semantics semantics = { 0 };
561 semantics.location = var->data.location;
562 semantics.num_slots = get_number_of_slots(state, var);
563 semantics.dual_source_blend_index = var->data.index;
564 semantics.gs_streams = gs_streams;
565 semantics.medium_precision = is_medium_precision(b->shader, var);
566 semantics.per_view = var->data.per_view;
567 semantics.invariant = var->data.invariant;
568
569 nir_intrinsic_set_io_semantics(store, semantics);
570
571 nir_builder_instr_insert(b, &store->instr);
572 }
573
574 static void
lower_store(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,const struct glsl_type * type)575 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
576 nir_def *array_index, nir_variable *var, nir_def *offset,
577 unsigned component, const struct glsl_type *type)
578 {
579 const bool lower_double = !glsl_type_is_integer(type) && state->options & nir_lower_io_lower_64bit_float_to_32;
580 if (intrin->src[1].ssa->bit_size == 64 &&
581 (lower_double || (state->options & (nir_lower_io_lower_64bit_to_32 |
582 nir_lower_io_lower_64bit_to_32_new)))) {
583 nir_builder *b = &state->builder;
584
585 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
586
587 assert(component == 0 || component == 2);
588 unsigned src_comp = 0;
589 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
590 while (src_comp < intrin->num_components) {
591 const unsigned num_comps =
592 MIN2(intrin->num_components - src_comp,
593 (4 - component) / 2);
594
595 if (write_mask & BITFIELD_MASK(num_comps)) {
596 nir_def *data =
597 nir_channels(b, intrin->src[1].ssa,
598 BITFIELD_RANGE(src_comp, num_comps));
599 nir_def *data32 = nir_bitcast_vector(b, data, 32);
600
601 uint32_t write_mask32 = 0;
602 for (unsigned i = 0; i < num_comps; i++) {
603 if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
604 write_mask32 |= 3 << (i * 2);
605 }
606
607 emit_store(state, data32, array_index, var, offset,
608 component, data32->num_components, write_mask32,
609 nir_type_uint32);
610 }
611
612 /* Only the first store has a component offset */
613 component = 0;
614 src_comp += num_comps;
615 write_mask >>= num_comps;
616 offset = nir_iadd_imm(b, offset, slot_size);
617 }
618 } else if (intrin->def.bit_size == 1) {
619 /* Booleans are 32-bit */
620 assert(glsl_type_is_boolean(type));
621 nir_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
622 emit_store(state, b32_val, array_index, var, offset,
623 component, intrin->num_components,
624 nir_intrinsic_write_mask(intrin),
625 nir_type_bool32);
626 } else {
627 emit_store(state, intrin->src[1].ssa, array_index, var, offset,
628 component, intrin->num_components,
629 nir_intrinsic_write_mask(intrin),
630 nir_get_nir_type_for_glsl_type(type));
631 }
632 }
633
634 static nir_def *
lower_interpolate_at(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_variable * var,nir_def * offset,unsigned component,const struct glsl_type * type)635 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
636 nir_variable *var, nir_def *offset, unsigned component,
637 const struct glsl_type *type)
638 {
639 nir_builder *b = &state->builder;
640 assert(var->data.mode == nir_var_shader_in);
641
642 /* Ignore interpolateAt() for flat variables - flat is flat. Lower
643 * interpolateAtVertex() for explicit variables.
644 */
645 if (var->data.interpolation == INTERP_MODE_FLAT ||
646 var->data.interpolation == INTERP_MODE_EXPLICIT) {
647 nir_def *vertex_index = NULL;
648
649 if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
650 assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
651 vertex_index = intrin->src[1].ssa;
652 }
653
654 return lower_load(intrin, state, vertex_index, var, offset, component, type);
655 }
656
657 /* None of the supported APIs allow interpolation on 64-bit things */
658 assert(intrin->def.bit_size <= 32);
659
660 nir_intrinsic_op bary_op;
661 switch (intrin->intrinsic) {
662 case nir_intrinsic_interp_deref_at_centroid:
663 bary_op = nir_intrinsic_load_barycentric_centroid;
664 break;
665 case nir_intrinsic_interp_deref_at_sample:
666 bary_op = nir_intrinsic_load_barycentric_at_sample;
667 break;
668 case nir_intrinsic_interp_deref_at_offset:
669 bary_op = nir_intrinsic_load_barycentric_at_offset;
670 break;
671 default:
672 unreachable("Bogus interpolateAt() intrinsic.");
673 }
674
675 nir_intrinsic_instr *bary_setup =
676 nir_intrinsic_instr_create(state->builder.shader, bary_op);
677
678 nir_def_init(&bary_setup->instr, &bary_setup->def, 2, 32);
679 nir_intrinsic_set_interp_mode(bary_setup, get_interp_mode(var));
680
681 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
682 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
683 intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
684 bary_setup->src[0] = nir_src_for_ssa(intrin->src[1].ssa);
685
686 nir_builder_instr_insert(b, &bary_setup->instr);
687
688 nir_io_semantics semantics = { 0 };
689 semantics.location = var->data.location;
690 semantics.num_slots = get_number_of_slots(state, var);
691 semantics.medium_precision = is_medium_precision(b->shader, var);
692
693 nir_def *load =
694 nir_load_interpolated_input(&state->builder,
695 intrin->def.num_components,
696 intrin->def.bit_size,
697 &bary_setup->def,
698 offset,
699 .base = var->data.driver_location,
700 .component = component,
701 .io_semantics = semantics);
702
703 return load;
704 }
705
706 /**
707 * Convert a compact view index emitted by nir_lower_multiview to an absolute
708 * view index.
709 */
710 static nir_def *
uncompact_view_index(nir_builder * b,nir_src compact_index_src)711 uncompact_view_index(nir_builder *b, nir_src compact_index_src)
712 {
713 /* We require nir_lower_io_to_temporaries when using absolute view indices,
714 * which ensures index is constant */
715 assert(nir_src_is_const(compact_index_src));
716 unsigned compact_index = nir_src_as_uint(compact_index_src);
717
718 unsigned view_index;
719 uint32_t view_mask = b->shader->info.view_mask;
720 for (unsigned i = 0; i <= compact_index; i++) {
721 view_index = u_bit_scan(&view_mask);
722 }
723
724 return nir_imm_int(b, view_index);
725 }
726
727 static bool
nir_lower_io_block(nir_block * block,struct lower_io_state * state)728 nir_lower_io_block(nir_block *block,
729 struct lower_io_state *state)
730 {
731 nir_builder *b = &state->builder;
732 const nir_shader_compiler_options *options = b->shader->options;
733 bool progress = false;
734
735 nir_foreach_instr_safe(instr, block) {
736 if (instr->type != nir_instr_type_intrinsic)
737 continue;
738
739 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
740
741 switch (intrin->intrinsic) {
742 case nir_intrinsic_load_deref:
743 case nir_intrinsic_store_deref:
744 /* We can lower the io for this nir instrinsic */
745 break;
746 case nir_intrinsic_interp_deref_at_centroid:
747 case nir_intrinsic_interp_deref_at_sample:
748 case nir_intrinsic_interp_deref_at_offset:
749 case nir_intrinsic_interp_deref_at_vertex:
750 /* We can optionally lower these to load_interpolated_input */
751 if (state->options & nir_lower_io_use_interpolated_input_intrinsics ||
752 options->lower_interpolate_at)
753 break;
754 FALLTHROUGH;
755 default:
756 /* We can't lower the io for this nir instrinsic, so skip it */
757 continue;
758 }
759
760 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
761 if (!nir_deref_mode_is_one_of(deref, state->modes))
762 continue;
763
764 nir_variable *var = nir_deref_instr_get_variable(deref);
765
766 b->cursor = nir_before_instr(instr);
767
768 const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage);
769
770 nir_def *offset;
771 nir_def *array_index = NULL;
772 unsigned component_offset = var->data.location_frac;
773 bool bindless_type_size = var->data.mode == nir_var_shader_in ||
774 var->data.mode == nir_var_shader_out ||
775 var->data.bindless;
776
777 if (nir_deref_instr_is_known_out_of_bounds(deref)) {
778 /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
779 *
780 * In the subsections described above for array, vector, matrix and
781 * structure accesses, any out-of-bounds access produced undefined
782 * behavior....
783 * Out-of-bounds reads return undefined values, which
784 * include values from other variables of the active program or zero.
785 * Out-of-bounds writes may be discarded or overwrite
786 * other variables of the active program.
787 *
788 * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero
789 * for reads.
790 *
791 * Otherwise get_io_offset would return out-of-bound offset which may
792 * result in out-of-bound loading/storing of inputs/outputs,
793 * that could cause issues in drivers down the line.
794 */
795 if (intrin->intrinsic != nir_intrinsic_store_deref) {
796 nir_def *zero =
797 nir_imm_zero(b, intrin->def.num_components,
798 intrin->def.bit_size);
799 nir_def_rewrite_uses(&intrin->def,
800 zero);
801 }
802
803 nir_instr_remove(&intrin->instr);
804 progress = true;
805 continue;
806 }
807
808 offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL,
809 state->type_size, &component_offset,
810 bindless_type_size);
811
812 if (!options->compact_view_index && array_index && var->data.per_view)
813 array_index = uncompact_view_index(b, nir_src_for_ssa(array_index));
814
815 nir_def *replacement = NULL;
816
817 switch (intrin->intrinsic) {
818 case nir_intrinsic_load_deref:
819 replacement = lower_load(intrin, state, array_index, var, offset,
820 component_offset, deref->type);
821 break;
822
823 case nir_intrinsic_store_deref:
824 lower_store(intrin, state, array_index, var, offset,
825 component_offset, deref->type);
826 break;
827
828 case nir_intrinsic_interp_deref_at_centroid:
829 case nir_intrinsic_interp_deref_at_sample:
830 case nir_intrinsic_interp_deref_at_offset:
831 case nir_intrinsic_interp_deref_at_vertex:
832 assert(array_index == NULL);
833 replacement = lower_interpolate_at(intrin, state, var, offset,
834 component_offset, deref->type);
835 break;
836
837 default:
838 continue;
839 }
840
841 if (replacement) {
842 nir_def_rewrite_uses(&intrin->def,
843 replacement);
844 }
845 nir_instr_remove(&intrin->instr);
846 progress = true;
847 }
848
849 return progress;
850 }
851
852 static bool
nir_lower_io_impl(nir_function_impl * impl,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)853 nir_lower_io_impl(nir_function_impl *impl,
854 nir_variable_mode modes,
855 int (*type_size)(const struct glsl_type *, bool),
856 nir_lower_io_options options)
857 {
858 struct lower_io_state state;
859 bool progress = false;
860
861 state.builder = nir_builder_create(impl);
862 state.dead_ctx = ralloc_context(NULL);
863 state.modes = modes;
864 state.type_size = type_size;
865 state.options = options;
866 _mesa_set_init(&state.variable_names, state.dead_ctx,
867 _mesa_hash_string, _mesa_key_string_equal);
868
869 ASSERTED nir_variable_mode supported_modes =
870 nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
871 assert(!(modes & ~supported_modes));
872
873 nir_foreach_block(block, impl) {
874 progress |= nir_lower_io_block(block, &state);
875 }
876
877 ralloc_free(state.dead_ctx);
878
879 nir_metadata_preserve(impl, nir_metadata_none);
880
881 return progress;
882 }
883
884 /** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
885 *
886 * This pass is intended to be used for cross-stage shader I/O and driver-
887 * managed uniforms to turn deref-based access into a simpler model using
888 * locations or offsets. For fragment shader inputs, it can optionally turn
889 * load_deref into an explicit interpolation using barycentrics coming from
890 * one of the load_barycentric_* intrinsics. This pass requires that all
891 * deref chains are complete and contain no casts.
892 */
893 bool
nir_lower_io(nir_shader * shader,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)894 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
895 int (*type_size)(const struct glsl_type *, bool),
896 nir_lower_io_options options)
897 {
898 bool progress = false;
899
900 nir_foreach_function_impl(impl, shader) {
901 progress |= nir_lower_io_impl(impl, modes, type_size, options);
902 }
903
904 return progress;
905 }
906
907 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)908 type_scalar_size_bytes(const struct glsl_type *type)
909 {
910 assert(glsl_type_is_vector_or_scalar(type) ||
911 glsl_type_is_matrix(type));
912 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
913 }
914
915 nir_def *
nir_build_addr_iadd(nir_builder * b,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,nir_def * offset)916 nir_build_addr_iadd(nir_builder *b, nir_def *addr,
917 nir_address_format addr_format,
918 nir_variable_mode modes,
919 nir_def *offset)
920 {
921 assert(offset->num_components == 1);
922
923 switch (addr_format) {
924 case nir_address_format_32bit_global:
925 case nir_address_format_64bit_global:
926 case nir_address_format_32bit_offset:
927 assert(addr->bit_size == offset->bit_size);
928 assert(addr->num_components == 1);
929 return nir_iadd(b, addr, offset);
930
931 case nir_address_format_2x32bit_global: {
932 assert(addr->num_components == 2);
933 nir_def *lo = nir_channel(b, addr, 0);
934 nir_def *hi = nir_channel(b, addr, 1);
935 nir_def *res_lo = nir_iadd(b, lo, offset);
936 nir_def *carry = nir_b2i32(b, nir_ult(b, res_lo, lo));
937 nir_def *res_hi = nir_iadd(b, hi, carry);
938 return nir_vec2(b, res_lo, res_hi);
939 }
940
941 case nir_address_format_32bit_offset_as_64bit:
942 assert(addr->num_components == 1);
943 assert(offset->bit_size == 32);
944 return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset));
945
946 case nir_address_format_64bit_global_32bit_offset:
947 case nir_address_format_64bit_bounded_global:
948 assert(addr->num_components == 4);
949 assert(addr->bit_size == offset->bit_size);
950 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3);
951
952 case nir_address_format_32bit_index_offset:
953 assert(addr->num_components == 2);
954 assert(addr->bit_size == offset->bit_size);
955 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1);
956
957 case nir_address_format_32bit_index_offset_pack64:
958 assert(addr->num_components == 1);
959 assert(offset->bit_size == 32);
960 return nir_pack_64_2x32_split(b,
961 nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset),
962 nir_unpack_64_2x32_split_y(b, addr));
963
964 case nir_address_format_vec2_index_32bit_offset:
965 assert(addr->num_components == 3);
966 assert(offset->bit_size == 32);
967 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2);
968
969 case nir_address_format_62bit_generic:
970 assert(addr->num_components == 1);
971 assert(addr->bit_size == 64);
972 assert(offset->bit_size == 64);
973 if (!(modes & ~(nir_var_function_temp |
974 nir_var_shader_temp |
975 nir_var_mem_shared))) {
976 /* If we're sure it's one of these modes, we can do an easy 32-bit
977 * addition and don't need to bother with 64-bit math.
978 */
979 nir_def *addr32 = nir_unpack_64_2x32_split_x(b, addr);
980 nir_def *type = nir_unpack_64_2x32_split_y(b, addr);
981 addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset));
982 return nir_pack_64_2x32_split(b, addr32, type);
983 } else {
984 return nir_iadd(b, addr, offset);
985 }
986
987 case nir_address_format_logical:
988 unreachable("Unsupported address format");
989 }
990 unreachable("Invalid address format");
991 }
992
993 static unsigned
addr_get_offset_bit_size(nir_def * addr,nir_address_format addr_format)994 addr_get_offset_bit_size(nir_def *addr, nir_address_format addr_format)
995 {
996 if (addr_format == nir_address_format_32bit_offset_as_64bit ||
997 addr_format == nir_address_format_32bit_index_offset_pack64)
998 return 32;
999 return addr->bit_size;
1000 }
1001
1002 nir_def *
nir_build_addr_iadd_imm(nir_builder * b,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,int64_t offset)1003 nir_build_addr_iadd_imm(nir_builder *b, nir_def *addr,
1004 nir_address_format addr_format,
1005 nir_variable_mode modes,
1006 int64_t offset)
1007 {
1008 if (!offset)
1009 return addr;
1010
1011 return nir_build_addr_iadd(
1012 b, addr, addr_format, modes,
1013 nir_imm_intN_t(b, offset,
1014 addr_get_offset_bit_size(addr, addr_format)));
1015 }
1016
1017 static nir_def *
build_addr_for_var(nir_builder * b,nir_variable * var,nir_address_format addr_format)1018 build_addr_for_var(nir_builder *b, nir_variable *var,
1019 nir_address_format addr_format)
1020 {
1021 assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared |
1022 nir_var_mem_task_payload |
1023 nir_var_mem_global |
1024 nir_var_shader_temp | nir_var_function_temp |
1025 nir_var_mem_push_const | nir_var_mem_constant));
1026
1027 const unsigned num_comps = nir_address_format_num_components(addr_format);
1028 const unsigned bit_size = nir_address_format_bit_size(addr_format);
1029
1030 switch (addr_format) {
1031 case nir_address_format_2x32bit_global:
1032 case nir_address_format_32bit_global:
1033 case nir_address_format_64bit_global: {
1034 nir_def *base_addr;
1035 switch (var->data.mode) {
1036 case nir_var_shader_temp:
1037 base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0);
1038 break;
1039
1040 case nir_var_function_temp:
1041 base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1);
1042 break;
1043
1044 case nir_var_mem_constant:
1045 base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size);
1046 break;
1047
1048 case nir_var_mem_shared:
1049 base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size);
1050 break;
1051
1052 case nir_var_mem_global:
1053 base_addr = nir_load_global_base_ptr(b, num_comps, bit_size);
1054 break;
1055
1056 default:
1057 unreachable("Unsupported variable mode");
1058 }
1059
1060 return nir_build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode,
1061 var->data.driver_location);
1062 }
1063
1064 case nir_address_format_32bit_offset:
1065 assert(var->data.driver_location <= UINT32_MAX);
1066 return nir_imm_int(b, var->data.driver_location);
1067
1068 case nir_address_format_32bit_offset_as_64bit:
1069 assert(var->data.driver_location <= UINT32_MAX);
1070 return nir_imm_int64(b, var->data.driver_location);
1071
1072 case nir_address_format_62bit_generic:
1073 switch (var->data.mode) {
1074 case nir_var_shader_temp:
1075 case nir_var_function_temp:
1076 assert(var->data.driver_location <= UINT32_MAX);
1077 return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64);
1078
1079 case nir_var_mem_shared:
1080 assert(var->data.driver_location <= UINT32_MAX);
1081 return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64);
1082
1083 case nir_var_mem_global:
1084 return nir_iadd_imm(b, nir_load_global_base_ptr(b, num_comps, bit_size),
1085 var->data.driver_location);
1086
1087 default:
1088 unreachable("Unsupported variable mode");
1089 }
1090
1091 default:
1092 unreachable("Unsupported address format");
1093 }
1094 }
1095
1096 static nir_def *
build_runtime_addr_mode_check(nir_builder * b,nir_def * addr,nir_address_format addr_format,nir_variable_mode mode)1097 build_runtime_addr_mode_check(nir_builder *b, nir_def *addr,
1098 nir_address_format addr_format,
1099 nir_variable_mode mode)
1100 {
1101 /* The compile-time check failed; do a run-time check */
1102 switch (addr_format) {
1103 case nir_address_format_62bit_generic: {
1104 assert(addr->num_components == 1);
1105 assert(addr->bit_size == 64);
1106 nir_def *mode_enum = nir_ushr_imm(b, addr, 62);
1107 switch (mode) {
1108 case nir_var_function_temp:
1109 case nir_var_shader_temp:
1110 return nir_ieq_imm(b, mode_enum, 0x2);
1111
1112 case nir_var_mem_shared:
1113 return nir_ieq_imm(b, mode_enum, 0x1);
1114
1115 case nir_var_mem_global:
1116 return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0),
1117 nir_ieq_imm(b, mode_enum, 0x3));
1118
1119 default:
1120 unreachable("Invalid mode check intrinsic");
1121 }
1122 }
1123
1124 default:
1125 unreachable("Unsupported address mode");
1126 }
1127 }
1128
1129 unsigned
nir_address_format_bit_size(nir_address_format addr_format)1130 nir_address_format_bit_size(nir_address_format addr_format)
1131 {
1132 switch (addr_format) {
1133 case nir_address_format_32bit_global:
1134 return 32;
1135 case nir_address_format_2x32bit_global:
1136 return 32;
1137 case nir_address_format_64bit_global:
1138 return 64;
1139 case nir_address_format_64bit_global_32bit_offset:
1140 return 32;
1141 case nir_address_format_64bit_bounded_global:
1142 return 32;
1143 case nir_address_format_32bit_index_offset:
1144 return 32;
1145 case nir_address_format_32bit_index_offset_pack64:
1146 return 64;
1147 case nir_address_format_vec2_index_32bit_offset:
1148 return 32;
1149 case nir_address_format_62bit_generic:
1150 return 64;
1151 case nir_address_format_32bit_offset:
1152 return 32;
1153 case nir_address_format_32bit_offset_as_64bit:
1154 return 64;
1155 case nir_address_format_logical:
1156 return 32;
1157 }
1158 unreachable("Invalid address format");
1159 }
1160
1161 unsigned
nir_address_format_num_components(nir_address_format addr_format)1162 nir_address_format_num_components(nir_address_format addr_format)
1163 {
1164 switch (addr_format) {
1165 case nir_address_format_32bit_global:
1166 return 1;
1167 case nir_address_format_2x32bit_global:
1168 return 2;
1169 case nir_address_format_64bit_global:
1170 return 1;
1171 case nir_address_format_64bit_global_32bit_offset:
1172 return 4;
1173 case nir_address_format_64bit_bounded_global:
1174 return 4;
1175 case nir_address_format_32bit_index_offset:
1176 return 2;
1177 case nir_address_format_32bit_index_offset_pack64:
1178 return 1;
1179 case nir_address_format_vec2_index_32bit_offset:
1180 return 3;
1181 case nir_address_format_62bit_generic:
1182 return 1;
1183 case nir_address_format_32bit_offset:
1184 return 1;
1185 case nir_address_format_32bit_offset_as_64bit:
1186 return 1;
1187 case nir_address_format_logical:
1188 return 1;
1189 }
1190 unreachable("Invalid address format");
1191 }
1192
1193 static nir_def *
addr_to_index(nir_builder * b,nir_def * addr,nir_address_format addr_format)1194 addr_to_index(nir_builder *b, nir_def *addr,
1195 nir_address_format addr_format)
1196 {
1197 switch (addr_format) {
1198 case nir_address_format_32bit_index_offset:
1199 assert(addr->num_components == 2);
1200 return nir_channel(b, addr, 0);
1201 case nir_address_format_32bit_index_offset_pack64:
1202 return nir_unpack_64_2x32_split_y(b, addr);
1203 case nir_address_format_vec2_index_32bit_offset:
1204 assert(addr->num_components == 3);
1205 return nir_trim_vector(b, addr, 2);
1206 default:
1207 unreachable("Invalid address format");
1208 }
1209 }
1210
1211 static nir_def *
addr_to_offset(nir_builder * b,nir_def * addr,nir_address_format addr_format)1212 addr_to_offset(nir_builder *b, nir_def *addr,
1213 nir_address_format addr_format)
1214 {
1215 switch (addr_format) {
1216 case nir_address_format_32bit_index_offset:
1217 assert(addr->num_components == 2);
1218 return nir_channel(b, addr, 1);
1219 case nir_address_format_32bit_index_offset_pack64:
1220 return nir_unpack_64_2x32_split_x(b, addr);
1221 case nir_address_format_vec2_index_32bit_offset:
1222 assert(addr->num_components == 3);
1223 return nir_channel(b, addr, 2);
1224 case nir_address_format_32bit_offset:
1225 return addr;
1226 case nir_address_format_32bit_offset_as_64bit:
1227 case nir_address_format_62bit_generic:
1228 return nir_u2u32(b, addr);
1229 default:
1230 unreachable("Invalid address format");
1231 }
1232 }
1233
1234 /** Returns true if the given address format resolves to a global address */
1235 static bool
addr_format_is_global(nir_address_format addr_format,nir_variable_mode mode)1236 addr_format_is_global(nir_address_format addr_format,
1237 nir_variable_mode mode)
1238 {
1239 if (addr_format == nir_address_format_62bit_generic)
1240 return mode == nir_var_mem_global;
1241
1242 return addr_format == nir_address_format_32bit_global ||
1243 addr_format == nir_address_format_2x32bit_global ||
1244 addr_format == nir_address_format_64bit_global ||
1245 addr_format == nir_address_format_64bit_global_32bit_offset ||
1246 addr_format == nir_address_format_64bit_bounded_global;
1247 }
1248
1249 static bool
addr_format_is_offset(nir_address_format addr_format,nir_variable_mode mode)1250 addr_format_is_offset(nir_address_format addr_format,
1251 nir_variable_mode mode)
1252 {
1253 if (addr_format == nir_address_format_62bit_generic)
1254 return mode != nir_var_mem_global;
1255
1256 return addr_format == nir_address_format_32bit_offset ||
1257 addr_format == nir_address_format_32bit_offset_as_64bit;
1258 }
1259
1260 static nir_def *
addr_to_global(nir_builder * b,nir_def * addr,nir_address_format addr_format)1261 addr_to_global(nir_builder *b, nir_def *addr,
1262 nir_address_format addr_format)
1263 {
1264 switch (addr_format) {
1265 case nir_address_format_32bit_global:
1266 case nir_address_format_64bit_global:
1267 case nir_address_format_62bit_generic:
1268 assert(addr->num_components == 1);
1269 return addr;
1270
1271 case nir_address_format_2x32bit_global:
1272 assert(addr->num_components == 2);
1273 return addr;
1274
1275 case nir_address_format_64bit_global_32bit_offset:
1276 case nir_address_format_64bit_bounded_global:
1277 assert(addr->num_components == 4);
1278 return nir_iadd(b, nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)),
1279 nir_u2u64(b, nir_channel(b, addr, 3)));
1280
1281 case nir_address_format_32bit_index_offset:
1282 case nir_address_format_32bit_index_offset_pack64:
1283 case nir_address_format_vec2_index_32bit_offset:
1284 case nir_address_format_32bit_offset:
1285 case nir_address_format_32bit_offset_as_64bit:
1286 case nir_address_format_logical:
1287 unreachable("Cannot get a 64-bit address with this address format");
1288 }
1289
1290 unreachable("Invalid address format");
1291 }
1292
1293 static bool
addr_format_needs_bounds_check(nir_address_format addr_format)1294 addr_format_needs_bounds_check(nir_address_format addr_format)
1295 {
1296 return addr_format == nir_address_format_64bit_bounded_global;
1297 }
1298
1299 static nir_def *
addr_is_in_bounds(nir_builder * b,nir_def * addr,nir_address_format addr_format,unsigned size)1300 addr_is_in_bounds(nir_builder *b, nir_def *addr,
1301 nir_address_format addr_format, unsigned size)
1302 {
1303 assert(addr_format == nir_address_format_64bit_bounded_global);
1304 assert(addr->num_components == 4);
1305 assert(size > 0);
1306 return nir_ult(b, nir_iadd_imm(b, nir_channel(b, addr, 3), size - 1),
1307 nir_channel(b, addr, 2));
1308 }
1309
1310 static void
nir_get_explicit_deref_range(nir_deref_instr * deref,nir_address_format addr_format,uint32_t * out_base,uint32_t * out_range)1311 nir_get_explicit_deref_range(nir_deref_instr *deref,
1312 nir_address_format addr_format,
1313 uint32_t *out_base,
1314 uint32_t *out_range)
1315 {
1316 uint32_t base = 0;
1317 uint32_t range = glsl_get_explicit_size(deref->type, false);
1318
1319 while (true) {
1320 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1321
1322 switch (deref->deref_type) {
1323 case nir_deref_type_array:
1324 case nir_deref_type_array_wildcard:
1325 case nir_deref_type_ptr_as_array: {
1326 const unsigned stride = nir_deref_instr_array_stride(deref);
1327 if (stride == 0)
1328 goto fail;
1329
1330 if (!parent)
1331 goto fail;
1332
1333 if (deref->deref_type != nir_deref_type_array_wildcard &&
1334 nir_src_is_const(deref->arr.index)) {
1335 base += stride * nir_src_as_uint(deref->arr.index);
1336 } else {
1337 if (glsl_get_length(parent->type) == 0)
1338 goto fail;
1339 range += stride * (glsl_get_length(parent->type) - 1);
1340 }
1341 break;
1342 }
1343
1344 case nir_deref_type_struct: {
1345 if (!parent)
1346 goto fail;
1347
1348 base += glsl_get_struct_field_offset(parent->type, deref->strct.index);
1349 break;
1350 }
1351
1352 case nir_deref_type_cast: {
1353 nir_instr *parent_instr = deref->parent.ssa->parent_instr;
1354
1355 switch (parent_instr->type) {
1356 case nir_instr_type_load_const: {
1357 nir_load_const_instr *load = nir_instr_as_load_const(parent_instr);
1358
1359 switch (addr_format) {
1360 case nir_address_format_32bit_offset:
1361 base += load->value[1].u32;
1362 break;
1363 case nir_address_format_32bit_index_offset:
1364 base += load->value[1].u32;
1365 break;
1366 case nir_address_format_vec2_index_32bit_offset:
1367 base += load->value[2].u32;
1368 break;
1369 default:
1370 goto fail;
1371 }
1372
1373 *out_base = base;
1374 *out_range = range;
1375 return;
1376 }
1377
1378 case nir_instr_type_intrinsic: {
1379 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr);
1380 switch (intr->intrinsic) {
1381 case nir_intrinsic_load_vulkan_descriptor:
1382 /* Assume that a load_vulkan_descriptor won't contribute to an
1383 * offset within the resource.
1384 */
1385 break;
1386 default:
1387 goto fail;
1388 }
1389
1390 *out_base = base;
1391 *out_range = range;
1392 return;
1393 }
1394
1395 default:
1396 goto fail;
1397 }
1398 }
1399
1400 default:
1401 goto fail;
1402 }
1403
1404 deref = parent;
1405 }
1406
1407 fail:
1408 *out_base = 0;
1409 *out_range = ~0;
1410 }
1411
1412 static nir_variable_mode
canonicalize_generic_modes(nir_variable_mode modes)1413 canonicalize_generic_modes(nir_variable_mode modes)
1414 {
1415 assert(modes != 0);
1416 if (util_bitcount(modes) == 1)
1417 return modes;
1418
1419 assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp |
1420 nir_var_mem_shared | nir_var_mem_global)));
1421
1422 /* Canonicalize by converting shader_temp to function_temp */
1423 if (modes & nir_var_shader_temp) {
1424 modes &= ~nir_var_shader_temp;
1425 modes |= nir_var_function_temp;
1426 }
1427
1428 return modes;
1429 }
1430
1431 static nir_intrinsic_op
get_store_global_op_from_addr_format(nir_address_format addr_format)1432 get_store_global_op_from_addr_format(nir_address_format addr_format)
1433 {
1434 if (addr_format != nir_address_format_2x32bit_global)
1435 return nir_intrinsic_store_global;
1436 else
1437 return nir_intrinsic_store_global_2x32;
1438 }
1439
1440 static nir_intrinsic_op
get_load_global_op_from_addr_format(nir_address_format addr_format)1441 get_load_global_op_from_addr_format(nir_address_format addr_format)
1442 {
1443 if (addr_format != nir_address_format_2x32bit_global)
1444 return nir_intrinsic_load_global;
1445 else
1446 return nir_intrinsic_load_global_2x32;
1447 }
1448
1449 static nir_intrinsic_op
get_load_global_constant_op_from_addr_format(nir_address_format addr_format)1450 get_load_global_constant_op_from_addr_format(nir_address_format addr_format)
1451 {
1452 if (addr_format != nir_address_format_2x32bit_global)
1453 return nir_intrinsic_load_global_constant;
1454 else
1455 return nir_intrinsic_load_global_2x32; /* no dedicated op, fallback */
1456 }
1457
1458 static nir_def *
build_explicit_io_load(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,unsigned num_components)1459 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
1460 nir_def *addr, nir_address_format addr_format,
1461 nir_variable_mode modes,
1462 uint32_t align_mul, uint32_t align_offset,
1463 unsigned num_components)
1464 {
1465 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1466 modes = canonicalize_generic_modes(modes);
1467
1468 if (util_bitcount(modes) > 1) {
1469 if (addr_format_is_global(addr_format, modes)) {
1470 return build_explicit_io_load(b, intrin, addr, addr_format,
1471 nir_var_mem_global,
1472 align_mul, align_offset,
1473 num_components);
1474 } else if (modes & nir_var_function_temp) {
1475 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1476 nir_var_function_temp));
1477 nir_def *res1 =
1478 build_explicit_io_load(b, intrin, addr, addr_format,
1479 nir_var_function_temp,
1480 align_mul, align_offset,
1481 num_components);
1482 nir_push_else(b, NULL);
1483 nir_def *res2 =
1484 build_explicit_io_load(b, intrin, addr, addr_format,
1485 modes & ~nir_var_function_temp,
1486 align_mul, align_offset,
1487 num_components);
1488 nir_pop_if(b, NULL);
1489 return nir_if_phi(b, res1, res2);
1490 } else {
1491 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1492 nir_var_mem_shared));
1493 assert(modes & nir_var_mem_shared);
1494 nir_def *res1 =
1495 build_explicit_io_load(b, intrin, addr, addr_format,
1496 nir_var_mem_shared,
1497 align_mul, align_offset,
1498 num_components);
1499 nir_push_else(b, NULL);
1500 assert(modes & nir_var_mem_global);
1501 nir_def *res2 =
1502 build_explicit_io_load(b, intrin, addr, addr_format,
1503 nir_var_mem_global,
1504 align_mul, align_offset,
1505 num_components);
1506 nir_pop_if(b, NULL);
1507 return nir_if_phi(b, res1, res2);
1508 }
1509 }
1510
1511 assert(util_bitcount(modes) == 1);
1512 const nir_variable_mode mode = modes;
1513
1514 nir_intrinsic_op op;
1515 switch (intrin->intrinsic) {
1516 case nir_intrinsic_load_deref:
1517 switch (mode) {
1518 case nir_var_mem_ubo:
1519 if (addr_format == nir_address_format_64bit_global_32bit_offset)
1520 op = nir_intrinsic_load_global_constant_offset;
1521 else if (addr_format == nir_address_format_64bit_bounded_global)
1522 op = nir_intrinsic_load_global_constant_bounded;
1523 else if (addr_format_is_global(addr_format, mode))
1524 op = nir_intrinsic_load_global_constant;
1525 else
1526 op = nir_intrinsic_load_ubo;
1527 break;
1528 case nir_var_mem_ssbo:
1529 if (addr_format_is_global(addr_format, mode))
1530 op = nir_intrinsic_load_global;
1531 else
1532 op = nir_intrinsic_load_ssbo;
1533 break;
1534 case nir_var_mem_global:
1535 assert(addr_format_is_global(addr_format, mode));
1536
1537 if (nir_intrinsic_has_access(intrin) &&
1538 (nir_intrinsic_access(intrin) & ACCESS_CAN_REORDER))
1539 op = get_load_global_constant_op_from_addr_format(addr_format);
1540 else
1541 op = get_load_global_op_from_addr_format(addr_format);
1542 break;
1543 case nir_var_uniform:
1544 assert(addr_format_is_offset(addr_format, mode));
1545 assert(b->shader->info.stage == MESA_SHADER_KERNEL);
1546 op = nir_intrinsic_load_kernel_input;
1547 break;
1548 case nir_var_mem_shared:
1549 assert(addr_format_is_offset(addr_format, mode));
1550 op = nir_intrinsic_load_shared;
1551 break;
1552 case nir_var_mem_task_payload:
1553 assert(addr_format_is_offset(addr_format, mode));
1554 op = nir_intrinsic_load_task_payload;
1555 break;
1556 case nir_var_shader_temp:
1557 case nir_var_function_temp:
1558 if (addr_format_is_offset(addr_format, mode)) {
1559 op = nir_intrinsic_load_scratch;
1560 } else {
1561 assert(addr_format_is_global(addr_format, mode));
1562 op = get_load_global_op_from_addr_format(addr_format);
1563 }
1564 break;
1565 case nir_var_mem_push_const:
1566 assert(addr_format == nir_address_format_32bit_offset);
1567 op = nir_intrinsic_load_push_constant;
1568 break;
1569 case nir_var_mem_constant:
1570 if (addr_format_is_offset(addr_format, mode)) {
1571 op = nir_intrinsic_load_constant;
1572 } else {
1573 assert(addr_format_is_global(addr_format, mode));
1574 op = get_load_global_constant_op_from_addr_format(addr_format);
1575 }
1576 break;
1577 default:
1578 unreachable("Unsupported explicit IO variable mode");
1579 }
1580 break;
1581
1582 case nir_intrinsic_load_deref_block_intel:
1583 switch (mode) {
1584 case nir_var_mem_ssbo:
1585 if (addr_format_is_global(addr_format, mode))
1586 op = nir_intrinsic_load_global_block_intel;
1587 else
1588 op = nir_intrinsic_load_ssbo_block_intel;
1589 break;
1590 case nir_var_mem_global:
1591 op = nir_intrinsic_load_global_block_intel;
1592 break;
1593 case nir_var_mem_shared:
1594 op = nir_intrinsic_load_shared_block_intel;
1595 break;
1596 default:
1597 unreachable("Unsupported explicit IO variable mode");
1598 }
1599 break;
1600
1601 default:
1602 unreachable("Invalid intrinsic");
1603 }
1604
1605 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
1606
1607 if (op == nir_intrinsic_load_global_constant_offset) {
1608 assert(addr_format == nir_address_format_64bit_global_32bit_offset);
1609 load->src[0] = nir_src_for_ssa(
1610 nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
1611 load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1612 } else if (op == nir_intrinsic_load_global_constant_bounded) {
1613 assert(addr_format == nir_address_format_64bit_bounded_global);
1614 load->src[0] = nir_src_for_ssa(
1615 nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
1616 load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1617 load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2));
1618 } else if (addr_format_is_global(addr_format, mode)) {
1619 load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1620 } else if (addr_format_is_offset(addr_format, mode)) {
1621 assert(addr->num_components == 1);
1622 load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1623 } else {
1624 load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1625 load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1626 }
1627
1628 if (nir_intrinsic_has_access(load))
1629 nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
1630
1631 if (op == nir_intrinsic_load_constant) {
1632 nir_intrinsic_set_base(load, 0);
1633 nir_intrinsic_set_range(load, b->shader->constant_data_size);
1634 } else if (op == nir_intrinsic_load_kernel_input) {
1635 nir_intrinsic_set_base(load, 0);
1636 nir_intrinsic_set_range(load, b->shader->num_uniforms);
1637 } else if (mode == nir_var_mem_push_const) {
1638 /* Push constants are required to be able to be chased back to the
1639 * variable so we can provide a base/range.
1640 */
1641 nir_variable *var = nir_deref_instr_get_variable(deref);
1642 nir_intrinsic_set_base(load, 0);
1643 nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false));
1644 }
1645
1646 unsigned bit_size = intrin->def.bit_size;
1647 if (bit_size == 1) {
1648 /* TODO: Make the native bool bit_size an option. */
1649 bit_size = 32;
1650 }
1651
1652 if (nir_intrinsic_has_align(load))
1653 nir_intrinsic_set_align(load, align_mul, align_offset);
1654
1655 if (nir_intrinsic_has_range_base(load)) {
1656 unsigned base, range;
1657 nir_get_explicit_deref_range(deref, addr_format, &base, &range);
1658 nir_intrinsic_set_range_base(load, base);
1659 nir_intrinsic_set_range(load, range);
1660 }
1661
1662 load->num_components = num_components;
1663 nir_def_init(&load->instr, &load->def, num_components, bit_size);
1664
1665 assert(bit_size % 8 == 0);
1666
1667 nir_def *result;
1668 if (addr_format_needs_bounds_check(addr_format) &&
1669 op != nir_intrinsic_load_global_constant_bounded) {
1670 /* We don't need to bounds-check global_constant_bounded because bounds
1671 * checking is handled by the intrinsic itself.
1672 *
1673 * The Vulkan spec for robustBufferAccess gives us quite a few options
1674 * as to what we can do with an OOB read. Unfortunately, returning
1675 * undefined values isn't one of them so we return an actual zero.
1676 */
1677 nir_def *zero = nir_imm_zero(b, load->num_components, bit_size);
1678
1679 /* TODO: Better handle block_intel. */
1680 assert(load->num_components == 1);
1681 const unsigned load_size = bit_size / 8;
1682 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
1683
1684 nir_builder_instr_insert(b, &load->instr);
1685
1686 nir_pop_if(b, NULL);
1687
1688 result = nir_if_phi(b, &load->def, zero);
1689 } else {
1690 nir_builder_instr_insert(b, &load->instr);
1691 result = &load->def;
1692 }
1693
1694 if (intrin->def.bit_size == 1) {
1695 /* For shared, we can go ahead and use NIR's and/or the back-end's
1696 * standard encoding for booleans rather than forcing a 0/1 boolean.
1697 * This should save an instruction or two.
1698 */
1699 if (mode == nir_var_mem_shared ||
1700 mode == nir_var_shader_temp ||
1701 mode == nir_var_function_temp)
1702 result = nir_b2b1(b, result);
1703 else
1704 result = nir_i2b(b, result);
1705 }
1706
1707 return result;
1708 }
1709
1710 static void
build_explicit_io_store(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,nir_def * value,nir_component_mask_t write_mask)1711 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
1712 nir_def *addr, nir_address_format addr_format,
1713 nir_variable_mode modes,
1714 uint32_t align_mul, uint32_t align_offset,
1715 nir_def *value, nir_component_mask_t write_mask)
1716 {
1717 modes = canonicalize_generic_modes(modes);
1718
1719 if (util_bitcount(modes) > 1) {
1720 if (addr_format_is_global(addr_format, modes)) {
1721 build_explicit_io_store(b, intrin, addr, addr_format,
1722 nir_var_mem_global,
1723 align_mul, align_offset,
1724 value, write_mask);
1725 } else if (modes & nir_var_function_temp) {
1726 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1727 nir_var_function_temp));
1728 build_explicit_io_store(b, intrin, addr, addr_format,
1729 nir_var_function_temp,
1730 align_mul, align_offset,
1731 value, write_mask);
1732 nir_push_else(b, NULL);
1733 build_explicit_io_store(b, intrin, addr, addr_format,
1734 modes & ~nir_var_function_temp,
1735 align_mul, align_offset,
1736 value, write_mask);
1737 nir_pop_if(b, NULL);
1738 } else {
1739 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1740 nir_var_mem_shared));
1741 assert(modes & nir_var_mem_shared);
1742 build_explicit_io_store(b, intrin, addr, addr_format,
1743 nir_var_mem_shared,
1744 align_mul, align_offset,
1745 value, write_mask);
1746 nir_push_else(b, NULL);
1747 assert(modes & nir_var_mem_global);
1748 build_explicit_io_store(b, intrin, addr, addr_format,
1749 nir_var_mem_global,
1750 align_mul, align_offset,
1751 value, write_mask);
1752 nir_pop_if(b, NULL);
1753 }
1754 return;
1755 }
1756
1757 assert(util_bitcount(modes) == 1);
1758 const nir_variable_mode mode = modes;
1759
1760 nir_intrinsic_op op;
1761 switch (intrin->intrinsic) {
1762 case nir_intrinsic_store_deref:
1763 assert(write_mask != 0);
1764
1765 switch (mode) {
1766 case nir_var_mem_ssbo:
1767 if (addr_format_is_global(addr_format, mode))
1768 op = get_store_global_op_from_addr_format(addr_format);
1769 else
1770 op = nir_intrinsic_store_ssbo;
1771 break;
1772 case nir_var_mem_global:
1773 assert(addr_format_is_global(addr_format, mode));
1774 op = get_store_global_op_from_addr_format(addr_format);
1775 break;
1776 case nir_var_mem_shared:
1777 assert(addr_format_is_offset(addr_format, mode));
1778 op = nir_intrinsic_store_shared;
1779 break;
1780 case nir_var_mem_task_payload:
1781 assert(addr_format_is_offset(addr_format, mode));
1782 op = nir_intrinsic_store_task_payload;
1783 break;
1784 case nir_var_shader_temp:
1785 case nir_var_function_temp:
1786 if (addr_format_is_offset(addr_format, mode)) {
1787 op = nir_intrinsic_store_scratch;
1788 } else {
1789 assert(addr_format_is_global(addr_format, mode));
1790 op = get_store_global_op_from_addr_format(addr_format);
1791 }
1792 break;
1793 default:
1794 unreachable("Unsupported explicit IO variable mode");
1795 }
1796 break;
1797
1798 case nir_intrinsic_store_deref_block_intel:
1799 assert(write_mask == 0);
1800
1801 switch (mode) {
1802 case nir_var_mem_ssbo:
1803 if (addr_format_is_global(addr_format, mode))
1804 op = nir_intrinsic_store_global_block_intel;
1805 else
1806 op = nir_intrinsic_store_ssbo_block_intel;
1807 break;
1808 case nir_var_mem_global:
1809 op = nir_intrinsic_store_global_block_intel;
1810 break;
1811 case nir_var_mem_shared:
1812 op = nir_intrinsic_store_shared_block_intel;
1813 break;
1814 default:
1815 unreachable("Unsupported explicit IO variable mode");
1816 }
1817 break;
1818
1819 default:
1820 unreachable("Invalid intrinsic");
1821 }
1822
1823 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
1824
1825 if (value->bit_size == 1) {
1826 /* For shared, we can go ahead and use NIR's and/or the back-end's
1827 * standard encoding for booleans rather than forcing a 0/1 boolean.
1828 * This should save an instruction or two.
1829 *
1830 * TODO: Make the native bool bit_size an option.
1831 */
1832 if (mode == nir_var_mem_shared ||
1833 mode == nir_var_shader_temp ||
1834 mode == nir_var_function_temp)
1835 value = nir_b2b32(b, value);
1836 else
1837 value = nir_b2iN(b, value, 32);
1838 }
1839
1840 store->src[0] = nir_src_for_ssa(value);
1841 if (addr_format_is_global(addr_format, mode)) {
1842 store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1843 } else if (addr_format_is_offset(addr_format, mode)) {
1844 assert(addr->num_components == 1);
1845 store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1846 } else {
1847 store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1848 store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1849 }
1850
1851 nir_intrinsic_set_write_mask(store, write_mask);
1852
1853 if (nir_intrinsic_has_access(store))
1854 nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
1855
1856 nir_intrinsic_set_align(store, align_mul, align_offset);
1857
1858 assert(value->num_components == 1 ||
1859 value->num_components == intrin->num_components);
1860 store->num_components = value->num_components;
1861
1862 assert(value->bit_size % 8 == 0);
1863
1864 if (addr_format_needs_bounds_check(addr_format)) {
1865 /* TODO: Better handle block_intel. */
1866 assert(store->num_components == 1);
1867 const unsigned store_size = value->bit_size / 8;
1868 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
1869
1870 nir_builder_instr_insert(b, &store->instr);
1871
1872 nir_pop_if(b, NULL);
1873 } else {
1874 nir_builder_instr_insert(b, &store->instr);
1875 }
1876 }
1877
1878 static nir_def *
build_explicit_io_atomic(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes)1879 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
1880 nir_def *addr, nir_address_format addr_format,
1881 nir_variable_mode modes)
1882 {
1883 modes = canonicalize_generic_modes(modes);
1884
1885 if (util_bitcount(modes) > 1) {
1886 if (addr_format_is_global(addr_format, modes)) {
1887 return build_explicit_io_atomic(b, intrin, addr, addr_format,
1888 nir_var_mem_global);
1889 } else if (modes & nir_var_function_temp) {
1890 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1891 nir_var_function_temp));
1892 nir_def *res1 =
1893 build_explicit_io_atomic(b, intrin, addr, addr_format,
1894 nir_var_function_temp);
1895 nir_push_else(b, NULL);
1896 nir_def *res2 =
1897 build_explicit_io_atomic(b, intrin, addr, addr_format,
1898 modes & ~nir_var_function_temp);
1899 nir_pop_if(b, NULL);
1900 return nir_if_phi(b, res1, res2);
1901 } else {
1902 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1903 nir_var_mem_shared));
1904 assert(modes & nir_var_mem_shared);
1905 nir_def *res1 =
1906 build_explicit_io_atomic(b, intrin, addr, addr_format,
1907 nir_var_mem_shared);
1908 nir_push_else(b, NULL);
1909 assert(modes & nir_var_mem_global);
1910 nir_def *res2 =
1911 build_explicit_io_atomic(b, intrin, addr, addr_format,
1912 nir_var_mem_global);
1913 nir_pop_if(b, NULL);
1914 return nir_if_phi(b, res1, res2);
1915 }
1916 }
1917
1918 assert(util_bitcount(modes) == 1);
1919 const nir_variable_mode mode = modes;
1920
1921 const unsigned num_data_srcs =
1922 nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
1923
1924 nir_intrinsic_op op;
1925 switch (mode) {
1926 case nir_var_mem_ssbo:
1927 if (addr_format_is_global(addr_format, mode))
1928 op = global_atomic_for_deref(addr_format, intrin->intrinsic);
1929 else
1930 op = ssbo_atomic_for_deref(intrin->intrinsic);
1931 break;
1932 case nir_var_mem_global:
1933 assert(addr_format_is_global(addr_format, mode));
1934 op = global_atomic_for_deref(addr_format, intrin->intrinsic);
1935 break;
1936 case nir_var_mem_shared:
1937 assert(addr_format_is_offset(addr_format, mode));
1938 op = shared_atomic_for_deref(intrin->intrinsic);
1939 break;
1940 case nir_var_mem_task_payload:
1941 assert(addr_format_is_offset(addr_format, mode));
1942 op = task_payload_atomic_for_deref(intrin->intrinsic);
1943 break;
1944 default:
1945 unreachable("Unsupported explicit IO variable mode");
1946 }
1947
1948 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
1949 nir_intrinsic_set_atomic_op(atomic, nir_intrinsic_atomic_op(intrin));
1950
1951 unsigned src = 0;
1952 if (addr_format_is_global(addr_format, mode)) {
1953 atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1954 } else if (addr_format_is_offset(addr_format, mode)) {
1955 assert(addr->num_components == 1);
1956 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1957 } else {
1958 atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1959 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1960 }
1961 for (unsigned i = 0; i < num_data_srcs; i++) {
1962 atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
1963 }
1964
1965 /* Global atomics don't have access flags because they assume that the
1966 * address may be non-uniform.
1967 */
1968 if (nir_intrinsic_has_access(atomic))
1969 nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
1970
1971 assert(intrin->def.num_components == 1);
1972 nir_def_init(&atomic->instr, &atomic->def, 1,
1973 intrin->def.bit_size);
1974
1975 assert(atomic->def.bit_size % 8 == 0);
1976
1977 if (addr_format_needs_bounds_check(addr_format)) {
1978 const unsigned atomic_size = atomic->def.bit_size / 8;
1979 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
1980
1981 nir_builder_instr_insert(b, &atomic->instr);
1982
1983 nir_pop_if(b, NULL);
1984 return nir_if_phi(b, &atomic->def,
1985 nir_undef(b, 1, atomic->def.bit_size));
1986 } else {
1987 nir_builder_instr_insert(b, &atomic->instr);
1988 return &atomic->def;
1989 }
1990 }
1991
1992 nir_def *
nir_explicit_io_address_from_deref(nir_builder * b,nir_deref_instr * deref,nir_def * base_addr,nir_address_format addr_format)1993 nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
1994 nir_def *base_addr,
1995 nir_address_format addr_format)
1996 {
1997 switch (deref->deref_type) {
1998 case nir_deref_type_var:
1999 return build_addr_for_var(b, deref->var, addr_format);
2000
2001 case nir_deref_type_ptr_as_array:
2002 case nir_deref_type_array: {
2003 unsigned stride = nir_deref_instr_array_stride(deref);
2004 assert(stride > 0);
2005
2006 unsigned offset_bit_size = addr_get_offset_bit_size(base_addr, addr_format);
2007 nir_def *index = deref->arr.index.ssa;
2008 nir_def *offset;
2009
2010 /* If the access chain has been declared in-bounds, then we know it doesn't
2011 * overflow the type. For nir_deref_type_array, this implies it cannot be
2012 * negative. Also, since types in NIR have a maximum 32-bit size, we know the
2013 * final result will fit in a 32-bit value so we can convert the index to
2014 * 32-bit before multiplying and save ourselves from a 64-bit multiply.
2015 */
2016 if (deref->arr.in_bounds && deref->deref_type == nir_deref_type_array) {
2017 index = nir_u2u32(b, index);
2018 offset = nir_u2uN(b, nir_amul_imm(b, index, stride), offset_bit_size);
2019 } else {
2020 index = nir_i2iN(b, index, offset_bit_size);
2021 offset = nir_amul_imm(b, index, stride);
2022 }
2023
2024 return nir_build_addr_iadd(b, base_addr, addr_format,
2025 deref->modes, offset);
2026 }
2027
2028 case nir_deref_type_array_wildcard:
2029 unreachable("Wildcards should be lowered by now");
2030 break;
2031
2032 case nir_deref_type_struct: {
2033 nir_deref_instr *parent = nir_deref_instr_parent(deref);
2034 int offset = glsl_get_struct_field_offset(parent->type,
2035 deref->strct.index);
2036 assert(offset >= 0);
2037 return nir_build_addr_iadd_imm(b, base_addr, addr_format,
2038 deref->modes, offset);
2039 }
2040
2041 case nir_deref_type_cast:
2042 /* Nothing to do here */
2043 return base_addr;
2044 }
2045
2046 unreachable("Invalid NIR deref type");
2047 }
2048
2049 void
nir_lower_explicit_io_instr(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format)2050 nir_lower_explicit_io_instr(nir_builder *b,
2051 nir_intrinsic_instr *intrin,
2052 nir_def *addr,
2053 nir_address_format addr_format)
2054 {
2055 b->cursor = nir_after_instr(&intrin->instr);
2056
2057 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2058 unsigned vec_stride = glsl_get_explicit_stride(deref->type);
2059 unsigned scalar_size = type_scalar_size_bytes(deref->type);
2060 if (vec_stride == 0) {
2061 vec_stride = scalar_size;
2062 } else {
2063 assert(glsl_type_is_vector(deref->type));
2064 assert(vec_stride >= scalar_size);
2065 }
2066
2067 uint32_t align_mul, align_offset;
2068 if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) {
2069 /* If we don't have an alignment from the deref, assume scalar */
2070 align_mul = scalar_size;
2071 align_offset = 0;
2072 }
2073
2074 /* In order for bounds checking to be correct as per the Vulkan spec,
2075 * we need to check at the individual component granularity. Prior to
2076 * robustness2, we're technically allowed to be sloppy by 16B. Even with
2077 * robustness2, UBO loads are allowed to have a granularity as high as 256B
2078 * depending on hardware limits. However, we have none of that information
2079 * here. Short of adding new address formats, the easiest way to do that
2080 * is to just split any loads and stores into individual components here.
2081 *
2082 * TODO: At some point in the future we may want to add more ops similar to
2083 * nir_intrinsic_load_global_constant_bounded and make bouds checking the
2084 * back-end's problem. Another option would be to somehow plumb more of
2085 * that information through to nir_lower_explicit_io. For now, however,
2086 * scalarizing is at least correct.
2087 */
2088 bool scalarize = vec_stride > scalar_size ||
2089 addr_format_needs_bounds_check(addr_format);
2090
2091 switch (intrin->intrinsic) {
2092 case nir_intrinsic_load_deref: {
2093 nir_def *value;
2094 if (scalarize) {
2095 nir_def *comps[NIR_MAX_VEC_COMPONENTS] = {
2096 NULL,
2097 };
2098 for (unsigned i = 0; i < intrin->num_components; i++) {
2099 unsigned comp_offset = i * vec_stride;
2100 nir_def *comp_addr = nir_build_addr_iadd_imm(b, addr, addr_format,
2101 deref->modes,
2102 comp_offset);
2103 comps[i] = build_explicit_io_load(b, intrin, comp_addr,
2104 addr_format, deref->modes,
2105 align_mul,
2106 (align_offset + comp_offset) %
2107 align_mul,
2108 1);
2109 }
2110 value = nir_vec(b, comps, intrin->num_components);
2111 } else {
2112 value = build_explicit_io_load(b, intrin, addr, addr_format,
2113 deref->modes, align_mul, align_offset,
2114 intrin->num_components);
2115 }
2116 nir_def_rewrite_uses(&intrin->def, value);
2117 break;
2118 }
2119
2120 case nir_intrinsic_store_deref: {
2121 nir_def *value = intrin->src[1].ssa;
2122 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
2123 if (scalarize) {
2124 for (unsigned i = 0; i < intrin->num_components; i++) {
2125 if (!(write_mask & (1 << i)))
2126 continue;
2127
2128 unsigned comp_offset = i * vec_stride;
2129 nir_def *comp_addr = nir_build_addr_iadd_imm(b, addr, addr_format,
2130 deref->modes,
2131 comp_offset);
2132 build_explicit_io_store(b, intrin, comp_addr, addr_format,
2133 deref->modes, align_mul,
2134 (align_offset + comp_offset) % align_mul,
2135 nir_channel(b, value, i), 1);
2136 }
2137 } else {
2138 build_explicit_io_store(b, intrin, addr, addr_format,
2139 deref->modes, align_mul, align_offset,
2140 value, write_mask);
2141 }
2142 break;
2143 }
2144
2145 case nir_intrinsic_load_deref_block_intel: {
2146 nir_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
2147 deref->modes,
2148 align_mul, align_offset,
2149 intrin->num_components);
2150 nir_def_rewrite_uses(&intrin->def, value);
2151 break;
2152 }
2153
2154 case nir_intrinsic_store_deref_block_intel: {
2155 nir_def *value = intrin->src[1].ssa;
2156 const nir_component_mask_t write_mask = 0;
2157 build_explicit_io_store(b, intrin, addr, addr_format,
2158 deref->modes, align_mul, align_offset,
2159 value, write_mask);
2160 break;
2161 }
2162
2163 default: {
2164 nir_def *value =
2165 build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
2166 nir_def_rewrite_uses(&intrin->def, value);
2167 break;
2168 }
2169 }
2170
2171 nir_instr_remove(&intrin->instr);
2172 }
2173
2174 bool
nir_get_explicit_deref_align(nir_deref_instr * deref,bool default_to_type_align,uint32_t * align_mul,uint32_t * align_offset)2175 nir_get_explicit_deref_align(nir_deref_instr *deref,
2176 bool default_to_type_align,
2177 uint32_t *align_mul,
2178 uint32_t *align_offset)
2179 {
2180 if (deref->deref_type == nir_deref_type_var) {
2181 /* If we see a variable, align_mul is effectively infinite because we
2182 * know the offset exactly (up to the offset of the base pointer for the
2183 * given variable mode). We have to pick something so we choose 256B
2184 * as an arbitrary alignment which seems high enough for any reasonable
2185 * wide-load use-case. Back-ends should clamp alignments down if 256B
2186 * is too large for some reason.
2187 */
2188 *align_mul = 256;
2189 *align_offset = deref->var->data.driver_location % 256;
2190 return true;
2191 }
2192
2193 /* If we're a cast deref that has an alignment, use that. */
2194 if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) {
2195 *align_mul = deref->cast.align_mul;
2196 *align_offset = deref->cast.align_offset;
2197 return true;
2198 }
2199
2200 /* Otherwise, we need to compute the alignment based on the parent */
2201 nir_deref_instr *parent = nir_deref_instr_parent(deref);
2202 if (parent == NULL) {
2203 assert(deref->deref_type == nir_deref_type_cast);
2204 if (default_to_type_align) {
2205 /* If we don't have a parent, assume the type's alignment, if any. */
2206 unsigned type_align = glsl_get_explicit_alignment(deref->type);
2207 if (type_align == 0)
2208 return false;
2209
2210 *align_mul = type_align;
2211 *align_offset = 0;
2212 return true;
2213 } else {
2214 return false;
2215 }
2216 }
2217
2218 uint32_t parent_mul, parent_offset;
2219 if (!nir_get_explicit_deref_align(parent, default_to_type_align,
2220 &parent_mul, &parent_offset))
2221 return false;
2222
2223 switch (deref->deref_type) {
2224 case nir_deref_type_var:
2225 unreachable("Handled above");
2226
2227 case nir_deref_type_array:
2228 case nir_deref_type_array_wildcard:
2229 case nir_deref_type_ptr_as_array: {
2230 const unsigned stride = nir_deref_instr_array_stride(deref);
2231 if (stride == 0)
2232 return false;
2233
2234 if (deref->deref_type != nir_deref_type_array_wildcard &&
2235 nir_src_is_const(deref->arr.index)) {
2236 unsigned offset = nir_src_as_uint(deref->arr.index) * stride;
2237 *align_mul = parent_mul;
2238 *align_offset = (parent_offset + offset) % parent_mul;
2239 } else {
2240 /* If this is a wildcard or an indirect deref, we have to go with the
2241 * power-of-two gcd.
2242 */
2243 *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1));
2244 *align_offset = parent_offset % *align_mul;
2245 }
2246 return true;
2247 }
2248
2249 case nir_deref_type_struct: {
2250 const int offset = glsl_get_struct_field_offset(parent->type,
2251 deref->strct.index);
2252 if (offset < 0)
2253 return false;
2254
2255 *align_mul = parent_mul;
2256 *align_offset = (parent_offset + offset) % parent_mul;
2257 return true;
2258 }
2259
2260 case nir_deref_type_cast:
2261 /* We handled the explicit alignment case above. */
2262 assert(deref->cast.align_mul == 0);
2263 *align_mul = parent_mul;
2264 *align_offset = parent_offset;
2265 return true;
2266 }
2267
2268 unreachable("Invalid deref_instr_type");
2269 }
2270
2271 static void
lower_explicit_io_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format)2272 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
2273 nir_address_format addr_format)
2274 {
2275 /* Ignore samplers/textures, because they are handled by other passes like `nir_lower_samplers`.
2276 * Also do it only for those being uniforms, otherwise it will break GL bindless textures handles
2277 * stored in UBOs.
2278 */
2279 if (nir_deref_mode_is_in_set(deref, nir_var_uniform) &&
2280 (glsl_type_is_sampler(deref->type) ||
2281 glsl_type_is_texture(deref->type)))
2282 return;
2283
2284 /* Just delete the deref if it's not used. We can't use
2285 * nir_deref_instr_remove_if_unused here because it may remove more than
2286 * one deref which could break our list walking since we walk the list
2287 * backwards.
2288 */
2289 if (nir_def_is_unused(&deref->def)) {
2290 nir_instr_remove(&deref->instr);
2291 return;
2292 }
2293
2294 b->cursor = nir_after_instr(&deref->instr);
2295
2296 nir_def *base_addr = NULL;
2297 if (deref->deref_type != nir_deref_type_var) {
2298 base_addr = deref->parent.ssa;
2299 }
2300
2301 nir_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
2302 addr_format);
2303 assert(addr->bit_size == deref->def.bit_size);
2304 assert(addr->num_components == deref->def.num_components);
2305
2306 nir_instr_remove(&deref->instr);
2307 nir_def_rewrite_uses(&deref->def, addr);
2308 }
2309
2310 static void
lower_explicit_io_access(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2311 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
2312 nir_address_format addr_format)
2313 {
2314 nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
2315 }
2316
2317 static void
lower_explicit_io_array_length(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2318 lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
2319 nir_address_format addr_format)
2320 {
2321 b->cursor = nir_after_instr(&intrin->instr);
2322
2323 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2324
2325 assert(glsl_type_is_array(deref->type));
2326 assert(glsl_get_length(deref->type) == 0);
2327 assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
2328 unsigned stride = glsl_get_explicit_stride(deref->type);
2329 assert(stride > 0);
2330
2331 nir_def *addr = &deref->def;
2332
2333 nir_def *offset, *size;
2334 switch (addr_format) {
2335 case nir_address_format_64bit_global_32bit_offset:
2336 case nir_address_format_64bit_bounded_global:
2337 offset = nir_channel(b, addr, 3);
2338 size = nir_channel(b, addr, 2);
2339 break;
2340
2341 case nir_address_format_32bit_index_offset:
2342 case nir_address_format_32bit_index_offset_pack64:
2343 case nir_address_format_vec2_index_32bit_offset: {
2344 offset = addr_to_offset(b, addr, addr_format);
2345 nir_def *index = addr_to_index(b, addr, addr_format);
2346 unsigned access = nir_intrinsic_access(intrin);
2347 size = nir_get_ssbo_size(b, index, .access = access);
2348 break;
2349 }
2350
2351 default:
2352 unreachable("Cannot determine SSBO size");
2353 }
2354
2355 nir_def *remaining = nir_usub_sat(b, size, offset);
2356 nir_def *arr_size = nir_udiv_imm(b, remaining, stride);
2357
2358 nir_def_replace(&intrin->def, arr_size);
2359 }
2360
2361 static void
lower_explicit_io_mode_check(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2362 lower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin,
2363 nir_address_format addr_format)
2364 {
2365 if (addr_format_is_global(addr_format, 0)) {
2366 /* If the address format is always global, then the driver can use
2367 * global addresses regardless of the mode. In that case, don't create
2368 * a check, just whack the intrinsic to addr_mode_is and delegate to the
2369 * driver lowering.
2370 */
2371 intrin->intrinsic = nir_intrinsic_addr_mode_is;
2372 return;
2373 }
2374
2375 nir_def *addr = intrin->src[0].ssa;
2376
2377 b->cursor = nir_instr_remove(&intrin->instr);
2378
2379 nir_def *is_mode =
2380 build_runtime_addr_mode_check(b, addr, addr_format,
2381 nir_intrinsic_memory_modes(intrin));
2382
2383 nir_def_rewrite_uses(&intrin->def, is_mode);
2384 }
2385
2386 static bool
nir_lower_explicit_io_impl(nir_function_impl * impl,nir_variable_mode modes,nir_address_format addr_format)2387 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
2388 nir_address_format addr_format)
2389 {
2390 bool progress = false;
2391
2392 nir_builder b = nir_builder_create(impl);
2393
2394 /* Walk in reverse order so that we can see the full deref chain when we
2395 * lower the access operations. We lower them assuming that the derefs
2396 * will be turned into address calculations later.
2397 */
2398 nir_foreach_block_reverse(block, impl) {
2399 nir_foreach_instr_reverse_safe(instr, block) {
2400 switch (instr->type) {
2401 case nir_instr_type_deref: {
2402 nir_deref_instr *deref = nir_instr_as_deref(instr);
2403 if (nir_deref_mode_is_in_set(deref, modes)) {
2404 lower_explicit_io_deref(&b, deref, addr_format);
2405 progress = true;
2406 }
2407 break;
2408 }
2409
2410 case nir_instr_type_intrinsic: {
2411 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2412 switch (intrin->intrinsic) {
2413 case nir_intrinsic_load_deref:
2414 case nir_intrinsic_store_deref:
2415 case nir_intrinsic_load_deref_block_intel:
2416 case nir_intrinsic_store_deref_block_intel:
2417 case nir_intrinsic_deref_atomic:
2418 case nir_intrinsic_deref_atomic_swap: {
2419 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2420 if (nir_deref_mode_is_in_set(deref, modes)) {
2421 lower_explicit_io_access(&b, intrin, addr_format);
2422 progress = true;
2423 }
2424 break;
2425 }
2426
2427 case nir_intrinsic_deref_buffer_array_length: {
2428 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2429 if (nir_deref_mode_is_in_set(deref, modes)) {
2430 lower_explicit_io_array_length(&b, intrin, addr_format);
2431 progress = true;
2432 }
2433 break;
2434 }
2435
2436 case nir_intrinsic_deref_mode_is: {
2437 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2438 if (nir_deref_mode_is_in_set(deref, modes)) {
2439 lower_explicit_io_mode_check(&b, intrin, addr_format);
2440 progress = true;
2441 }
2442 break;
2443 }
2444
2445 case nir_intrinsic_launch_mesh_workgroups_with_payload_deref: {
2446 if (modes & nir_var_mem_task_payload) {
2447 /* Get address and size of the payload variable. */
2448 nir_deref_instr *deref = nir_src_as_deref(intrin->src[1]);
2449 assert(deref->deref_type == nir_deref_type_var);
2450 unsigned base = deref->var->data.explicit_location;
2451 unsigned size = glsl_get_explicit_size(deref->var->type, false);
2452
2453 /* Replace the current instruction with the explicit intrinsic. */
2454 nir_def *dispatch_3d = intrin->src[0].ssa;
2455 b.cursor = nir_instr_remove(instr);
2456 nir_launch_mesh_workgroups(&b, dispatch_3d, .base = base, .range = size);
2457 progress = true;
2458 }
2459
2460 break;
2461 }
2462
2463 default:
2464 break;
2465 }
2466 break;
2467 }
2468
2469 default:
2470 /* Nothing to do */
2471 break;
2472 }
2473 }
2474 }
2475
2476 if (progress) {
2477 nir_metadata_preserve(impl, nir_metadata_none);
2478 } else {
2479 nir_metadata_preserve(impl, nir_metadata_all);
2480 }
2481
2482 return progress;
2483 }
2484
2485 /** Lower explicitly laid out I/O access to byte offset/address intrinsics
2486 *
2487 * This pass is intended to be used for any I/O which touches memory external
2488 * to the shader or which is directly visible to the client. It requires that
2489 * all data types in the given modes have a explicit stride/offset decorations
2490 * to tell it exactly how to calculate the offset/address for the given load,
2491 * store, or atomic operation. If the offset/stride information does not come
2492 * from the client explicitly (as with shared variables in GL or Vulkan),
2493 * nir_lower_vars_to_explicit_types() can be used to add them.
2494 *
2495 * Unlike nir_lower_io, this pass is fully capable of handling incomplete
2496 * pointer chains which may contain cast derefs. It does so by walking the
2497 * deref chain backwards and simply replacing each deref, one at a time, with
2498 * the appropriate address calculation. The pass takes a nir_address_format
2499 * parameter which describes how the offset or address is to be represented
2500 * during calculations. By ensuring that the address is always in a
2501 * consistent format, pointers can safely be conjured from thin air by the
2502 * driver, stored to variables, passed through phis, etc.
2503 *
2504 * The one exception to the simple algorithm described above is for handling
2505 * row-major matrices in which case we may look down one additional level of
2506 * the deref chain.
2507 *
2508 * This pass is also capable of handling OpenCL generic pointers. If the
2509 * address mode is global, it will lower any ambiguous (more than one mode)
2510 * access to global and pass through the deref_mode_is run-time checks as
2511 * addr_mode_is. This assumes the driver has somehow mapped shared and
2512 * scratch memory to the global address space. For other modes such as
2513 * 62bit_generic, there is an enum embedded in the address and we lower
2514 * ambiguous access to an if-ladder and deref_mode_is to a check against the
2515 * embedded enum. If nir_lower_explicit_io is called on any shader that
2516 * contains generic pointers, it must either be used on all of the generic
2517 * modes or none.
2518 */
2519 bool
nir_lower_explicit_io(nir_shader * shader,nir_variable_mode modes,nir_address_format addr_format)2520 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
2521 nir_address_format addr_format)
2522 {
2523 bool progress = false;
2524
2525 nir_foreach_function_impl(impl, shader) {
2526 if (impl && nir_lower_explicit_io_impl(impl, modes, addr_format))
2527 progress = true;
2528 }
2529
2530 return progress;
2531 }
2532
2533 static bool
nir_lower_vars_to_explicit_types_impl(nir_function_impl * impl,nir_variable_mode modes,glsl_type_size_align_func type_info)2534 nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
2535 nir_variable_mode modes,
2536 glsl_type_size_align_func type_info)
2537 {
2538 bool progress = false;
2539
2540 nir_foreach_block(block, impl) {
2541 nir_foreach_instr(instr, block) {
2542 if (instr->type != nir_instr_type_deref)
2543 continue;
2544
2545 nir_deref_instr *deref = nir_instr_as_deref(instr);
2546 if (!nir_deref_mode_is_in_set(deref, modes))
2547 continue;
2548
2549 unsigned size, alignment;
2550 const struct glsl_type *new_type =
2551 glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
2552 if (new_type != deref->type) {
2553 progress = true;
2554 deref->type = new_type;
2555 }
2556 if (deref->deref_type == nir_deref_type_cast) {
2557 /* See also glsl_type::get_explicit_type_for_size_align() */
2558 unsigned new_stride = align(size, alignment);
2559 if (new_stride != deref->cast.ptr_stride) {
2560 deref->cast.ptr_stride = new_stride;
2561 progress = true;
2562 }
2563 }
2564 }
2565 }
2566
2567 if (progress) {
2568 nir_metadata_preserve(impl, nir_metadata_control_flow |
2569 nir_metadata_live_defs |
2570 nir_metadata_loop_analysis);
2571 } else {
2572 nir_metadata_preserve(impl, nir_metadata_all);
2573 }
2574
2575 return progress;
2576 }
2577
2578 static bool
lower_vars_to_explicit(nir_shader * shader,struct exec_list * vars,nir_variable_mode mode,glsl_type_size_align_func type_info)2579 lower_vars_to_explicit(nir_shader *shader,
2580 struct exec_list *vars, nir_variable_mode mode,
2581 glsl_type_size_align_func type_info)
2582 {
2583 bool progress = false;
2584 unsigned offset;
2585 switch (mode) {
2586 case nir_var_uniform:
2587 assert(shader->info.stage == MESA_SHADER_KERNEL);
2588 offset = 0;
2589 break;
2590 case nir_var_function_temp:
2591 case nir_var_shader_temp:
2592 offset = shader->scratch_size;
2593 break;
2594 case nir_var_mem_shared:
2595 offset = shader->info.shared_size;
2596 break;
2597 case nir_var_mem_task_payload:
2598 offset = shader->info.task_payload_size;
2599 break;
2600 case nir_var_mem_node_payload:
2601 assert(!shader->info.cs.node_payloads_size);
2602 offset = 0;
2603 break;
2604 case nir_var_mem_global:
2605 offset = shader->global_mem_size;
2606 break;
2607 case nir_var_mem_constant:
2608 offset = shader->constant_data_size;
2609 break;
2610 case nir_var_shader_call_data:
2611 case nir_var_ray_hit_attrib:
2612 case nir_var_mem_node_payload_in:
2613 offset = 0;
2614 break;
2615 default:
2616 unreachable("Unsupported mode");
2617 }
2618 nir_foreach_variable_in_list(var, vars) {
2619 if (var->data.mode != mode)
2620 continue;
2621
2622 unsigned size, alignment;
2623 const struct glsl_type *explicit_type =
2624 glsl_get_explicit_type_for_size_align(var->type, type_info,
2625 &size, &alignment);
2626
2627 if (explicit_type != var->type)
2628 var->type = explicit_type;
2629
2630 UNUSED bool is_empty_struct =
2631 glsl_type_is_struct_or_ifc(explicit_type) &&
2632 glsl_get_length(explicit_type) == 0;
2633
2634 assert(util_is_power_of_two_nonzero(alignment) || is_empty_struct ||
2635 glsl_type_is_cmat(glsl_without_array(explicit_type)));
2636 assert(util_is_power_of_two_or_zero(var->data.alignment));
2637 alignment = MAX2(alignment, var->data.alignment);
2638
2639 var->data.driver_location = ALIGN_POT(offset, alignment);
2640 offset = var->data.driver_location + size;
2641 progress = true;
2642 }
2643
2644 switch (mode) {
2645 case nir_var_uniform:
2646 assert(shader->info.stage == MESA_SHADER_KERNEL);
2647 shader->num_uniforms = offset;
2648 break;
2649 case nir_var_shader_temp:
2650 case nir_var_function_temp:
2651 shader->scratch_size = offset;
2652 break;
2653 case nir_var_mem_shared:
2654 shader->info.shared_size = offset;
2655 break;
2656 case nir_var_mem_task_payload:
2657 shader->info.task_payload_size = offset;
2658 break;
2659 case nir_var_mem_node_payload:
2660 shader->info.cs.node_payloads_size = offset;
2661 break;
2662 case nir_var_mem_global:
2663 shader->global_mem_size = offset;
2664 break;
2665 case nir_var_mem_constant:
2666 shader->constant_data_size = offset;
2667 break;
2668 case nir_var_shader_call_data:
2669 case nir_var_ray_hit_attrib:
2670 case nir_var_mem_node_payload_in:
2671 break;
2672 default:
2673 unreachable("Unsupported mode");
2674 }
2675
2676 return progress;
2677 }
2678
2679 /* If nir_lower_vars_to_explicit_types is called on any shader that contains
2680 * generic pointers, it must either be used on all of the generic modes or
2681 * none.
2682 */
2683 bool
nir_lower_vars_to_explicit_types(nir_shader * shader,nir_variable_mode modes,glsl_type_size_align_func type_info)2684 nir_lower_vars_to_explicit_types(nir_shader *shader,
2685 nir_variable_mode modes,
2686 glsl_type_size_align_func type_info)
2687 {
2688 /* TODO: Situations which need to be handled to support more modes:
2689 * - row-major matrices
2690 * - compact shader inputs/outputs
2691 * - interface types
2692 */
2693 ASSERTED nir_variable_mode supported =
2694 nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant |
2695 nir_var_shader_temp | nir_var_function_temp | nir_var_uniform |
2696 nir_var_shader_call_data | nir_var_ray_hit_attrib |
2697 nir_var_mem_task_payload | nir_var_mem_node_payload |
2698 nir_var_mem_node_payload_in;
2699 assert(!(modes & ~supported) && "unsupported");
2700
2701 bool progress = false;
2702
2703 if (modes & nir_var_uniform)
2704 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info);
2705 if (modes & nir_var_mem_global)
2706 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info);
2707
2708 if (modes & nir_var_mem_shared) {
2709 assert(!shader->info.shared_memory_explicit_layout);
2710 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info);
2711 }
2712
2713 if (modes & nir_var_shader_temp)
2714 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info);
2715 if (modes & nir_var_mem_constant)
2716 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info);
2717 if (modes & nir_var_shader_call_data)
2718 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info);
2719 if (modes & nir_var_ray_hit_attrib)
2720 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info);
2721 if (modes & nir_var_mem_task_payload)
2722 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_task_payload, type_info);
2723 if (modes & nir_var_mem_node_payload)
2724 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload, type_info);
2725 if (modes & nir_var_mem_node_payload_in)
2726 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload_in, type_info);
2727
2728 nir_foreach_function_impl(impl, shader) {
2729 if (modes & nir_var_function_temp)
2730 progress |= lower_vars_to_explicit(shader, &impl->locals, nir_var_function_temp, type_info);
2731
2732 progress |= nir_lower_vars_to_explicit_types_impl(impl, modes, type_info);
2733 }
2734
2735 return progress;
2736 }
2737
2738 static void
write_constant(void * dst,size_t dst_size,const nir_constant * c,const struct glsl_type * type)2739 write_constant(void *dst, size_t dst_size,
2740 const nir_constant *c, const struct glsl_type *type)
2741 {
2742 if (c->is_null_constant) {
2743 memset(dst, 0, dst_size);
2744 return;
2745 }
2746
2747 if (glsl_type_is_vector_or_scalar(type)) {
2748 const unsigned num_components = glsl_get_vector_elements(type);
2749 const unsigned bit_size = glsl_get_bit_size(type);
2750 if (bit_size == 1) {
2751 /* Booleans are special-cased to be 32-bit
2752 *
2753 * TODO: Make the native bool bit_size an option.
2754 */
2755 assert(num_components * 4 <= dst_size);
2756 for (unsigned i = 0; i < num_components; i++) {
2757 int32_t b32 = -(int)c->values[i].b;
2758 memcpy((char *)dst + i * 4, &b32, 4);
2759 }
2760 } else {
2761 assert(bit_size >= 8 && bit_size % 8 == 0);
2762 const unsigned byte_size = bit_size / 8;
2763 assert(num_components * byte_size <= dst_size);
2764 for (unsigned i = 0; i < num_components; i++) {
2765 /* Annoyingly, thanks to packed structs, we can't make any
2766 * assumptions about the alignment of dst. To avoid any strange
2767 * issues with unaligned writes, we always use memcpy.
2768 */
2769 memcpy((char *)dst + i * byte_size, &c->values[i], byte_size);
2770 }
2771 }
2772 } else if (glsl_type_is_array_or_matrix(type)) {
2773 const unsigned array_len = glsl_get_length(type);
2774 const unsigned stride = glsl_get_explicit_stride(type);
2775 assert(stride > 0);
2776 const struct glsl_type *elem_type = glsl_get_array_element(type);
2777 for (unsigned i = 0; i < array_len; i++) {
2778 unsigned elem_offset = i * stride;
2779 assert(elem_offset < dst_size);
2780 write_constant((char *)dst + elem_offset, dst_size - elem_offset,
2781 c->elements[i], elem_type);
2782 }
2783 } else {
2784 assert(glsl_type_is_struct_or_ifc(type));
2785 const unsigned num_fields = glsl_get_length(type);
2786 for (unsigned i = 0; i < num_fields; i++) {
2787 const int field_offset = glsl_get_struct_field_offset(type, i);
2788 assert(field_offset >= 0 && field_offset < dst_size);
2789 const struct glsl_type *field_type = glsl_get_struct_field(type, i);
2790 write_constant((char *)dst + field_offset, dst_size - field_offset,
2791 c->elements[i], field_type);
2792 }
2793 }
2794 }
2795
2796 void
nir_gather_explicit_io_initializers(nir_shader * shader,void * dst,size_t dst_size,nir_variable_mode mode)2797 nir_gather_explicit_io_initializers(nir_shader *shader,
2798 void *dst, size_t dst_size,
2799 nir_variable_mode mode)
2800 {
2801 /* It doesn't really make sense to gather initializers for more than one
2802 * mode at a time. If this ever becomes well-defined, we can drop the
2803 * assert then.
2804 */
2805 assert(util_bitcount(mode) == 1);
2806
2807 nir_foreach_variable_with_modes(var, shader, mode) {
2808 assert(var->data.driver_location < dst_size);
2809 write_constant((char *)dst + var->data.driver_location,
2810 dst_size - var->data.driver_location,
2811 var->constant_initializer, var->type);
2812 }
2813 }
2814
2815 /**
2816 * Return the offset source number for a load/store intrinsic or -1 if there's no offset.
2817 */
2818 int
nir_get_io_offset_src_number(const nir_intrinsic_instr * instr)2819 nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
2820 {
2821 switch (instr->intrinsic) {
2822 case nir_intrinsic_load_input:
2823 case nir_intrinsic_load_per_primitive_input:
2824 case nir_intrinsic_load_output:
2825 case nir_intrinsic_load_shared:
2826 case nir_intrinsic_load_task_payload:
2827 case nir_intrinsic_load_uniform:
2828 case nir_intrinsic_load_constant:
2829 case nir_intrinsic_load_push_constant:
2830 case nir_intrinsic_load_kernel_input:
2831 case nir_intrinsic_load_global:
2832 case nir_intrinsic_load_global_2x32:
2833 case nir_intrinsic_load_global_constant:
2834 case nir_intrinsic_load_global_etna:
2835 case nir_intrinsic_load_scratch:
2836 case nir_intrinsic_load_fs_input_interp_deltas:
2837 case nir_intrinsic_shared_atomic:
2838 case nir_intrinsic_shared_atomic_swap:
2839 case nir_intrinsic_task_payload_atomic:
2840 case nir_intrinsic_task_payload_atomic_swap:
2841 case nir_intrinsic_global_atomic:
2842 case nir_intrinsic_global_atomic_2x32:
2843 case nir_intrinsic_global_atomic_swap:
2844 case nir_intrinsic_global_atomic_swap_2x32:
2845 case nir_intrinsic_load_coefficients_agx:
2846 return 0;
2847 case nir_intrinsic_load_ubo:
2848 case nir_intrinsic_load_ssbo:
2849 case nir_intrinsic_load_input_vertex:
2850 case nir_intrinsic_load_per_vertex_input:
2851 case nir_intrinsic_load_per_vertex_output:
2852 case nir_intrinsic_load_per_view_output:
2853 case nir_intrinsic_load_per_primitive_output:
2854 case nir_intrinsic_load_interpolated_input:
2855 case nir_intrinsic_load_smem_amd:
2856 case nir_intrinsic_store_output:
2857 case nir_intrinsic_store_shared:
2858 case nir_intrinsic_store_task_payload:
2859 case nir_intrinsic_store_global:
2860 case nir_intrinsic_store_global_2x32:
2861 case nir_intrinsic_store_global_etna:
2862 case nir_intrinsic_store_scratch:
2863 case nir_intrinsic_ssbo_atomic:
2864 case nir_intrinsic_ssbo_atomic_swap:
2865 case nir_intrinsic_ldc_nv:
2866 case nir_intrinsic_ldcx_nv:
2867 return 1;
2868 case nir_intrinsic_store_ssbo:
2869 case nir_intrinsic_store_per_vertex_output:
2870 case nir_intrinsic_store_per_view_output:
2871 case nir_intrinsic_store_per_primitive_output:
2872 case nir_intrinsic_load_attribute_pan:
2873 return 2;
2874 default:
2875 return -1;
2876 }
2877 }
2878
2879 /**
2880 * Return the offset source for a load/store intrinsic.
2881 */
2882 nir_src *
nir_get_io_offset_src(nir_intrinsic_instr * instr)2883 nir_get_io_offset_src(nir_intrinsic_instr *instr)
2884 {
2885 const int idx = nir_get_io_offset_src_number(instr);
2886 return idx >= 0 ? &instr->src[idx] : NULL;
2887 }
2888
2889 /**
2890 * Return the array index source number for an arrayed load/store intrinsic or -1 if there's no offset.
2891 */
2892 int
nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr * instr)2893 nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr *instr)
2894 {
2895 switch (instr->intrinsic) {
2896 case nir_intrinsic_load_per_vertex_input:
2897 case nir_intrinsic_load_per_vertex_output:
2898 case nir_intrinsic_load_per_view_output:
2899 case nir_intrinsic_load_per_primitive_output:
2900 return 0;
2901 case nir_intrinsic_store_per_vertex_output:
2902 case nir_intrinsic_store_per_view_output:
2903 case nir_intrinsic_store_per_primitive_output:
2904 return 1;
2905 default:
2906 return -1;
2907 }
2908 }
2909
2910 /**
2911 * Return the array index source for an arrayed load/store intrinsic.
2912 */
2913 nir_src *
nir_get_io_arrayed_index_src(nir_intrinsic_instr * instr)2914 nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr)
2915 {
2916 const int idx = nir_get_io_arrayed_index_src_number(instr);
2917 return idx >= 0 ? &instr->src[idx] : NULL;
2918 }
2919
2920 /**
2921 * Return the numeric constant that identify a NULL pointer for each address
2922 * format.
2923 */
2924 const nir_const_value *
nir_address_format_null_value(nir_address_format addr_format)2925 nir_address_format_null_value(nir_address_format addr_format)
2926 {
2927 const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
2928 [nir_address_format_32bit_global] = { { 0 } },
2929 [nir_address_format_2x32bit_global] = { { 0 } },
2930 [nir_address_format_64bit_global] = { { 0 } },
2931 [nir_address_format_64bit_global_32bit_offset] = { { 0 } },
2932 [nir_address_format_64bit_bounded_global] = { { 0 } },
2933 [nir_address_format_32bit_index_offset] = { { .u32 = ~0 }, { .u32 = ~0 } },
2934 [nir_address_format_32bit_index_offset_pack64] = { { .u64 = ~0ull } },
2935 [nir_address_format_vec2_index_32bit_offset] = { { .u32 = ~0 }, { .u32 = ~0 }, { .u32 = ~0 } },
2936 [nir_address_format_32bit_offset] = { { .u32 = ~0 } },
2937 [nir_address_format_32bit_offset_as_64bit] = { { .u64 = ~0ull } },
2938 [nir_address_format_62bit_generic] = { { .u64 = 0 } },
2939 [nir_address_format_logical] = { { .u32 = ~0 } },
2940 };
2941
2942 assert(addr_format < ARRAY_SIZE(null_values));
2943 return null_values[addr_format];
2944 }
2945
2946 nir_def *
nir_build_addr_ieq(nir_builder * b,nir_def * addr0,nir_def * addr1,nir_address_format addr_format)2947 nir_build_addr_ieq(nir_builder *b, nir_def *addr0, nir_def *addr1,
2948 nir_address_format addr_format)
2949 {
2950 switch (addr_format) {
2951 case nir_address_format_32bit_global:
2952 case nir_address_format_2x32bit_global:
2953 case nir_address_format_64bit_global:
2954 case nir_address_format_64bit_bounded_global:
2955 case nir_address_format_32bit_index_offset:
2956 case nir_address_format_vec2_index_32bit_offset:
2957 case nir_address_format_32bit_offset:
2958 case nir_address_format_62bit_generic:
2959 return nir_ball_iequal(b, addr0, addr1);
2960
2961 case nir_address_format_64bit_global_32bit_offset:
2962 return nir_ball_iequal(b, nir_channels(b, addr0, 0xb),
2963 nir_channels(b, addr1, 0xb));
2964
2965 case nir_address_format_32bit_offset_as_64bit:
2966 assert(addr0->num_components == 1 && addr1->num_components == 1);
2967 return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1));
2968
2969 case nir_address_format_32bit_index_offset_pack64:
2970 assert(addr0->num_components == 1 && addr1->num_components == 1);
2971 return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1));
2972
2973 case nir_address_format_logical:
2974 unreachable("Unsupported address format");
2975 }
2976
2977 unreachable("Invalid address format");
2978 }
2979
2980 nir_def *
nir_build_addr_isub(nir_builder * b,nir_def * addr0,nir_def * addr1,nir_address_format addr_format)2981 nir_build_addr_isub(nir_builder *b, nir_def *addr0, nir_def *addr1,
2982 nir_address_format addr_format)
2983 {
2984 switch (addr_format) {
2985 case nir_address_format_32bit_global:
2986 case nir_address_format_64bit_global:
2987 case nir_address_format_32bit_offset:
2988 case nir_address_format_32bit_index_offset_pack64:
2989 case nir_address_format_62bit_generic:
2990 assert(addr0->num_components == 1);
2991 assert(addr1->num_components == 1);
2992 return nir_isub(b, addr0, addr1);
2993
2994 case nir_address_format_2x32bit_global:
2995 return nir_isub(b, addr_to_global(b, addr0, addr_format),
2996 addr_to_global(b, addr1, addr_format));
2997
2998 case nir_address_format_32bit_offset_as_64bit:
2999 assert(addr0->num_components == 1);
3000 assert(addr1->num_components == 1);
3001 return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)));
3002
3003 case nir_address_format_64bit_global_32bit_offset:
3004 case nir_address_format_64bit_bounded_global:
3005 return nir_isub(b, addr_to_global(b, addr0, addr_format),
3006 addr_to_global(b, addr1, addr_format));
3007
3008 case nir_address_format_32bit_index_offset:
3009 assert(addr0->num_components == 2);
3010 assert(addr1->num_components == 2);
3011 /* Assume the same buffer index. */
3012 return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
3013
3014 case nir_address_format_vec2_index_32bit_offset:
3015 assert(addr0->num_components == 3);
3016 assert(addr1->num_components == 3);
3017 /* Assume the same buffer index. */
3018 return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2));
3019
3020 case nir_address_format_logical:
3021 unreachable("Unsupported address format");
3022 }
3023
3024 unreachable("Invalid address format");
3025 }
3026
3027 static bool
is_input(nir_intrinsic_instr * intrin)3028 is_input(nir_intrinsic_instr *intrin)
3029 {
3030 return intrin->intrinsic == nir_intrinsic_load_input ||
3031 intrin->intrinsic == nir_intrinsic_load_per_primitive_input ||
3032 intrin->intrinsic == nir_intrinsic_load_input_vertex ||
3033 intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
3034 intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
3035 intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
3036 }
3037
3038 static bool
is_output(nir_intrinsic_instr * intrin)3039 is_output(nir_intrinsic_instr *intrin)
3040 {
3041 return intrin->intrinsic == nir_intrinsic_load_output ||
3042 intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
3043 intrin->intrinsic == nir_intrinsic_load_per_view_output ||
3044 intrin->intrinsic == nir_intrinsic_load_per_primitive_output ||
3045 intrin->intrinsic == nir_intrinsic_store_output ||
3046 intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
3047 intrin->intrinsic == nir_intrinsic_store_per_view_output ||
3048 intrin->intrinsic == nir_intrinsic_store_per_primitive_output;
3049 }
3050
3051 static bool
is_dual_slot(nir_intrinsic_instr * intrin)3052 is_dual_slot(nir_intrinsic_instr *intrin)
3053 {
3054 if (intrin->intrinsic == nir_intrinsic_store_output ||
3055 intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
3056 intrin->intrinsic == nir_intrinsic_store_per_view_output ||
3057 intrin->intrinsic == nir_intrinsic_store_per_primitive_output) {
3058 return nir_src_bit_size(intrin->src[0]) == 64 &&
3059 nir_src_num_components(intrin->src[0]) >= 3;
3060 }
3061
3062 return intrin->def.bit_size == 64 &&
3063 intrin->def.num_components >= 3;
3064 }
3065
3066 /**
3067 * This pass adds constant offsets to instr->const_index[0] for input/output
3068 * intrinsics, and resets the offset source to 0. Non-constant offsets remain
3069 * unchanged - since we don't know what part of a compound variable is
3070 * accessed, we allocate storage for the entire thing. For drivers that use
3071 * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
3072 * the offset source will be 0, so that they don't have to add it in manually.
3073 */
3074
3075 static bool
add_const_offset_to_base_block(nir_block * block,nir_builder * b,nir_variable_mode modes)3076 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
3077 nir_variable_mode modes)
3078 {
3079 bool progress = false;
3080 nir_foreach_instr_safe(instr, block) {
3081 if (instr->type != nir_instr_type_intrinsic)
3082 continue;
3083
3084 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3085
3086 if (((modes & nir_var_shader_in) && is_input(intrin)) ||
3087 ((modes & nir_var_shader_out) && is_output(intrin))) {
3088 nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
3089
3090 /* NV_mesh_shader: ignore MS primitive indices. */
3091 if (b->shader->info.stage == MESA_SHADER_MESH &&
3092 sem.location == VARYING_SLOT_PRIMITIVE_INDICES &&
3093 !(b->shader->info.per_primitive_outputs &
3094 BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES)))
3095 continue;
3096
3097 nir_src *offset = nir_get_io_offset_src(intrin);
3098
3099 /* TODO: Better handling of per-view variables here */
3100 if (nir_src_is_const(*offset) &&
3101 !nir_intrinsic_io_semantics(intrin).per_view) {
3102 unsigned off = nir_src_as_uint(*offset);
3103
3104 if (off) {
3105 nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off);
3106
3107 sem.location += off;
3108 b->cursor = nir_before_instr(&intrin->instr);
3109 nir_src_rewrite(offset, nir_imm_int(b, 0));
3110 progress = true;
3111 }
3112 /* non-indirect indexing should reduce num_slots */
3113 sem.num_slots = is_dual_slot(intrin) ? 2 : 1;
3114 nir_intrinsic_set_io_semantics(intrin, sem);
3115 }
3116 }
3117 }
3118
3119 return progress;
3120 }
3121
3122 bool
nir_io_add_const_offset_to_base(nir_shader * nir,nir_variable_mode modes)3123 nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes)
3124 {
3125 bool progress = false;
3126
3127 nir_foreach_function_impl(impl, nir) {
3128 bool impl_progress = false;
3129 nir_builder b = nir_builder_create(impl);
3130 nir_foreach_block(block, impl) {
3131 impl_progress |= add_const_offset_to_base_block(block, &b, modes);
3132 }
3133 progress |= impl_progress;
3134 if (impl_progress)
3135 nir_metadata_preserve(impl, nir_metadata_control_flow);
3136 else
3137 nir_metadata_preserve(impl, nir_metadata_all);
3138 }
3139
3140 return progress;
3141 }
3142
3143 bool
nir_lower_color_inputs(nir_shader * nir)3144 nir_lower_color_inputs(nir_shader *nir)
3145 {
3146 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3147 bool progress = false;
3148
3149 nir_builder b = nir_builder_create(impl);
3150
3151 nir_foreach_block(block, impl) {
3152 nir_foreach_instr_safe(instr, block) {
3153 if (instr->type != nir_instr_type_intrinsic)
3154 continue;
3155
3156 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3157
3158 if (intrin->intrinsic != nir_intrinsic_load_input &&
3159 intrin->intrinsic != nir_intrinsic_load_interpolated_input)
3160 continue;
3161
3162 nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
3163
3164 if (sem.location != VARYING_SLOT_COL0 &&
3165 sem.location != VARYING_SLOT_COL1)
3166 continue;
3167
3168 /* Default to FLAT (for load_input) */
3169 enum glsl_interp_mode interp = INTERP_MODE_FLAT;
3170 bool sample = false;
3171 bool centroid = false;
3172
3173 if (intrin->intrinsic == nir_intrinsic_load_interpolated_input) {
3174 nir_intrinsic_instr *baryc =
3175 nir_instr_as_intrinsic(intrin->src[0].ssa->parent_instr);
3176
3177 centroid =
3178 baryc->intrinsic == nir_intrinsic_load_barycentric_centroid;
3179 sample =
3180 baryc->intrinsic == nir_intrinsic_load_barycentric_sample;
3181 assert(centroid || sample ||
3182 baryc->intrinsic == nir_intrinsic_load_barycentric_pixel);
3183
3184 interp = nir_intrinsic_interp_mode(baryc);
3185 }
3186
3187 b.cursor = nir_before_instr(instr);
3188 nir_def *load = NULL;
3189
3190 if (sem.location == VARYING_SLOT_COL0) {
3191 load = nir_load_color0(&b);
3192 nir->info.fs.color0_interp = interp;
3193 nir->info.fs.color0_sample = sample;
3194 nir->info.fs.color0_centroid = centroid;
3195 } else {
3196 assert(sem.location == VARYING_SLOT_COL1);
3197 load = nir_load_color1(&b);
3198 nir->info.fs.color1_interp = interp;
3199 nir->info.fs.color1_sample = sample;
3200 nir->info.fs.color1_centroid = centroid;
3201 }
3202
3203 if (intrin->num_components != 4) {
3204 unsigned start = nir_intrinsic_component(intrin);
3205 unsigned count = intrin->num_components;
3206 load = nir_channels(&b, load, BITFIELD_RANGE(start, count));
3207 }
3208
3209 nir_def_replace(&intrin->def, load);
3210 progress = true;
3211 }
3212 }
3213
3214 if (progress) {
3215 nir_metadata_preserve(impl, nir_metadata_control_flow);
3216 } else {
3217 nir_metadata_preserve(impl, nir_metadata_all);
3218 }
3219 return progress;
3220 }
3221
3222 bool
nir_io_add_intrinsic_xfb_info(nir_shader * nir)3223 nir_io_add_intrinsic_xfb_info(nir_shader *nir)
3224 {
3225 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3226 bool progress = false;
3227
3228 for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++)
3229 nir->info.xfb_stride[i] = nir->xfb_info->buffers[i].stride / 4;
3230
3231 nir_foreach_block(block, impl) {
3232 nir_foreach_instr_safe(instr, block) {
3233 if (instr->type != nir_instr_type_intrinsic)
3234 continue;
3235
3236 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3237
3238 if (!nir_intrinsic_has_io_xfb(intr))
3239 continue;
3240
3241 /* No indirect indexing allowed. The index is implied to be 0. */
3242 ASSERTED nir_src offset = *nir_get_io_offset_src(intr);
3243 assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
3244
3245 /* Calling this pass for the second time shouldn't do anything. */
3246 if (nir_intrinsic_io_xfb(intr).out[0].num_components ||
3247 nir_intrinsic_io_xfb(intr).out[1].num_components ||
3248 nir_intrinsic_io_xfb2(intr).out[0].num_components ||
3249 nir_intrinsic_io_xfb2(intr).out[1].num_components)
3250 continue;
3251
3252 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3253 unsigned writemask = nir_intrinsic_write_mask(intr) << nir_intrinsic_component(intr);
3254
3255 nir_io_xfb xfb[2];
3256 memset(xfb, 0, sizeof(xfb));
3257
3258 for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
3259 nir_xfb_output_info *out = &nir->xfb_info->outputs[i];
3260 if (out->location == sem.location) {
3261 unsigned xfb_mask = writemask & out->component_mask;
3262
3263 /*fprintf(stdout, "output%u: buffer=%u, offset=%u, location=%u, "
3264 "component_offset=%u, component_mask=0x%x, xfb_mask=0x%x, slots=%u\n",
3265 i, out->buffer,
3266 out->offset,
3267 out->location,
3268 out->component_offset,
3269 out->component_mask,
3270 xfb_mask, sem.num_slots);*/
3271
3272 while (xfb_mask) {
3273 int start, count;
3274 u_bit_scan_consecutive_range(&xfb_mask, &start, &count);
3275
3276 xfb[start / 2].out[start % 2].num_components = count;
3277 xfb[start / 2].out[start % 2].buffer = out->buffer;
3278 /* out->offset is relative to the first stored xfb component */
3279 /* start is relative to component 0 */
3280 xfb[start / 2].out[start % 2].offset =
3281 out->offset / 4 - out->component_offset + start;
3282
3283 progress = true;
3284 }
3285 }
3286 }
3287
3288 nir_intrinsic_set_io_xfb(intr, xfb[0]);
3289 nir_intrinsic_set_io_xfb2(intr, xfb[1]);
3290 }
3291 }
3292
3293 nir_metadata_preserve(impl, nir_metadata_all);
3294 return progress;
3295 }
3296
3297 static int
type_size_vec4(const struct glsl_type * type,bool bindless)3298 type_size_vec4(const struct glsl_type *type, bool bindless)
3299 {
3300 return glsl_count_attribute_slots(type, false);
3301 }
3302
3303 /**
3304 * This runs all compiler passes needed to lower IO, lower indirect IO access,
3305 * set transform feedback info in IO intrinsics, and clean up the IR.
3306 *
3307 * \param renumber_vs_inputs
3308 * Set to true if holes between VS inputs should be removed, which is safe
3309 * to do in any shader linker that can handle that. Set to false if you want
3310 * to keep holes between VS inputs, which is recommended to do in gallium
3311 * drivers so as not to break the mapping of vertex elements to VS inputs
3312 * expected by gallium frontends.
3313 */
3314 void
nir_lower_io_passes(nir_shader * nir,bool renumber_vs_inputs)3315 nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
3316 {
3317 if (nir->info.stage == MESA_SHADER_COMPUTE)
3318 return;
3319
3320 bool has_indirect_inputs =
3321 (nir->options->support_indirect_inputs >> nir->info.stage) & 0x1;
3322
3323 /* Transform feedback requires that indirect outputs are lowered. */
3324 bool has_indirect_outputs =
3325 (nir->options->support_indirect_outputs >> nir->info.stage) & 0x1 &&
3326 nir->xfb_info == NULL;
3327
3328 /* TODO: Sorting variables by location is required due to some bug
3329 * in nir_lower_io_to_temporaries. If variables are not sorted,
3330 * dEQP-GLES31.functional.separate_shader.random.0 fails.
3331 *
3332 * This isn't needed if nir_assign_io_var_locations is called because it
3333 * also sorts variables. However, if IO is lowered sooner than that, we
3334 * must sort explicitly here to get what nir_assign_io_var_locations does.
3335 */
3336 unsigned varying_var_mask =
3337 (nir->info.stage != MESA_SHADER_VERTEX ? nir_var_shader_in : 0) |
3338 (nir->info.stage != MESA_SHADER_FRAGMENT ? nir_var_shader_out : 0);
3339 nir_sort_variables_by_location(nir, varying_var_mask);
3340
3341 if (!has_indirect_inputs || !has_indirect_outputs) {
3342 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
3343 nir_shader_get_entrypoint(nir), !has_indirect_outputs,
3344 !has_indirect_inputs);
3345
3346 /* We need to lower all the copy_deref's introduced by lower_io_to-
3347 * _temporaries before calling nir_lower_io.
3348 */
3349 NIR_PASS_V(nir, nir_split_var_copies);
3350 NIR_PASS_V(nir, nir_lower_var_copies);
3351 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
3352
3353 /* This is partially redundant with nir_lower_io_to_temporaries.
3354 * The problem is that nir_lower_io_to_temporaries doesn't handle TCS.
3355 */
3356 if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
3357 NIR_PASS(_, nir, nir_lower_indirect_derefs,
3358 (!has_indirect_inputs ? nir_var_shader_in : 0) |
3359 (!has_indirect_outputs ? nir_var_shader_out : 0), UINT32_MAX);
3360 }
3361 }
3362
3363 /* The correct lower_64bit_to_32 flag is required by st/mesa depending
3364 * on whether the GLSL linker lowers IO or not. Setting the wrong flag
3365 * would break 64-bit vertex attribs for GLSL.
3366 */
3367 NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in,
3368 type_size_vec4,
3369 (renumber_vs_inputs ? nir_lower_io_lower_64bit_to_32_new :
3370 nir_lower_io_lower_64bit_to_32) |
3371 nir_lower_io_use_interpolated_input_intrinsics);
3372
3373 /* nir_io_add_const_offset_to_base needs actual constants. */
3374 NIR_PASS_V(nir, nir_opt_constant_folding);
3375 NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
3376
3377 /* Lower and remove dead derefs and variables to clean up the IR. */
3378 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3379 NIR_PASS_V(nir, nir_opt_dce);
3380 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3381
3382 /* If IO is lowered before var->data.driver_location is assigned, driver
3383 * locations are all 0, which means IO bases are all 0. It's not necessary
3384 * to set driver_location before lowering IO because the only thing that
3385 * identifies outputs is their semantic, and IO bases can always be
3386 * computed from the semantics.
3387 *
3388 * This assigns IO bases from scratch, using IO semantics to tell which
3389 * intrinsics refer to the same IO. If the bases already exist, they
3390 * will be reassigned, sorted by the semantic, and all holes removed.
3391 * This kind of canonicalizes all bases.
3392 *
3393 * This must be done after DCE to remove dead load_input intrinsics.
3394 */
3395 NIR_PASS_V(nir, nir_recompute_io_bases,
3396 (nir->info.stage != MESA_SHADER_VERTEX || renumber_vs_inputs ?
3397 nir_var_shader_in : 0) | nir_var_shader_out);
3398
3399 if (nir->xfb_info)
3400 NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
3401
3402 if (nir->options->lower_mediump_io)
3403 nir->options->lower_mediump_io(nir);
3404
3405 nir->info.io_lowered = true;
3406 }
3407