• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Connor Abbott (cwabbott0@gmail.com)
25  *    Jason Ekstrand (jason@jlekstrand.net)
26  *
27  */
28 
29 /*
30  * This lowering pass converts references to input/output variables with
31  * loads/stores to actual input/output intrinsics.
32  */
33 
34 #include "nir.h"
35 #include "nir_builder.h"
36 #include "nir_deref.h"
37 #include "nir_xfb_info.h"
38 
39 #include "util/u_math.h"
40 
41 struct lower_io_state {
42    void *dead_ctx;
43    nir_builder builder;
44    int (*type_size)(const struct glsl_type *type, bool);
45    nir_variable_mode modes;
46    nir_lower_io_options options;
47 };
48 
49 static nir_intrinsic_op
ssbo_atomic_for_deref(nir_intrinsic_op deref_op)50 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
51 {
52    switch (deref_op) {
53 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
54    OP(atomic_exchange)
55    OP(atomic_comp_swap)
56    OP(atomic_add)
57    OP(atomic_imin)
58    OP(atomic_umin)
59    OP(atomic_imax)
60    OP(atomic_umax)
61    OP(atomic_and)
62    OP(atomic_or)
63    OP(atomic_xor)
64    OP(atomic_fadd)
65    OP(atomic_fmin)
66    OP(atomic_fmax)
67    OP(atomic_fcomp_swap)
68 #undef OP
69    default:
70       unreachable("Invalid SSBO atomic");
71    }
72 }
73 
74 static nir_intrinsic_op
global_atomic_for_deref(nir_address_format addr_format,nir_intrinsic_op deref_op)75 global_atomic_for_deref(nir_address_format addr_format,
76                         nir_intrinsic_op deref_op)
77 {
78    switch (deref_op) {
79 #define OP(O) case nir_intrinsic_deref_##O:              \
80    if (addr_format != nir_address_format_2x32bit_global) \
81       return nir_intrinsic_global_##O;                   \
82    else                                                  \
83       return nir_intrinsic_global_##O##_2x32;
84    OP(atomic_exchange)
85    OP(atomic_comp_swap)
86    OP(atomic_add)
87    OP(atomic_imin)
88    OP(atomic_umin)
89    OP(atomic_imax)
90    OP(atomic_umax)
91    OP(atomic_and)
92    OP(atomic_or)
93    OP(atomic_xor)
94    OP(atomic_fadd)
95    OP(atomic_fmin)
96    OP(atomic_fmax)
97    OP(atomic_fcomp_swap)
98 #undef OP
99    default:
100       unreachable("Invalid SSBO atomic");
101    }
102 }
103 
104 static nir_intrinsic_op
shared_atomic_for_deref(nir_intrinsic_op deref_op)105 shared_atomic_for_deref(nir_intrinsic_op deref_op)
106 {
107    switch (deref_op) {
108 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O;
109    OP(atomic_exchange)
110    OP(atomic_comp_swap)
111    OP(atomic_add)
112    OP(atomic_imin)
113    OP(atomic_umin)
114    OP(atomic_imax)
115    OP(atomic_umax)
116    OP(atomic_and)
117    OP(atomic_or)
118    OP(atomic_xor)
119    OP(atomic_fadd)
120    OP(atomic_fmin)
121    OP(atomic_fmax)
122    OP(atomic_fcomp_swap)
123 #undef OP
124    default:
125       unreachable("Invalid shared atomic");
126    }
127 }
128 
129 static nir_intrinsic_op
task_payload_atomic_for_deref(nir_intrinsic_op deref_op)130 task_payload_atomic_for_deref(nir_intrinsic_op deref_op)
131 {
132    switch (deref_op) {
133 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_task_payload_##O;
134    OP(atomic_exchange)
135    OP(atomic_comp_swap)
136    OP(atomic_add)
137    OP(atomic_imin)
138    OP(atomic_umin)
139    OP(atomic_imax)
140    OP(atomic_umax)
141    OP(atomic_and)
142    OP(atomic_or)
143    OP(atomic_xor)
144    OP(atomic_fadd)
145    OP(atomic_fmin)
146    OP(atomic_fmax)
147    OP(atomic_fcomp_swap)
148 #undef OP
149    default:
150       unreachable("Invalid task payload atomic");
151    }
152 }
153 
154 void
nir_assign_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,int (* type_size)(const struct glsl_type *,bool))155 nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
156                          unsigned *size,
157                          int (*type_size)(const struct glsl_type *, bool))
158 {
159    unsigned location = 0;
160 
161    nir_foreach_variable_with_modes(var, shader, mode) {
162       var->data.driver_location = location;
163       bool bindless_type_size = var->data.mode == nir_var_shader_in ||
164                                 var->data.mode == nir_var_shader_out ||
165                                 var->data.bindless;
166       location += type_size(var->type, bindless_type_size);
167    }
168 
169    *size = location;
170 }
171 
172 /**
173  * Some inputs and outputs are arrayed, meaning that there is an extra level
174  * of array indexing to handle mismatches between the shader interface and the
175  * dispatch pattern of the shader.  For instance, geometry shaders are
176  * executed per-primitive while their inputs and outputs are specified
177  * per-vertex so all inputs and outputs have to be additionally indexed with
178  * the vertex index within the primitive.
179  */
180 bool
nir_is_arrayed_io(const nir_variable * var,gl_shader_stage stage)181 nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage)
182 {
183    if (var->data.patch || !glsl_type_is_array(var->type))
184       return false;
185 
186    if (stage == MESA_SHADER_MESH) {
187       /* NV_mesh_shader: this is flat array for the whole workgroup. */
188       if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES)
189          return var->data.per_primitive;
190    }
191 
192    if (var->data.mode == nir_var_shader_in)
193       return stage == MESA_SHADER_GEOMETRY ||
194              stage == MESA_SHADER_TESS_CTRL ||
195              stage == MESA_SHADER_TESS_EVAL;
196 
197    if (var->data.mode == nir_var_shader_out)
198       return stage == MESA_SHADER_TESS_CTRL ||
199              stage == MESA_SHADER_MESH;
200 
201    return false;
202 }
203 
get_number_of_slots(struct lower_io_state * state,const nir_variable * var)204 static unsigned get_number_of_slots(struct lower_io_state *state,
205                                     const nir_variable *var)
206 {
207    const struct glsl_type *type = var->type;
208 
209    if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) {
210       assert(glsl_type_is_array(type));
211       type = glsl_get_array_element(type);
212    }
213 
214    /* NV_mesh_shader:
215     * PRIMITIVE_INDICES is a flat array, not a proper arrayed output,
216     * as opposed to D3D-style mesh shaders where it's addressed by
217     * the primitive index.
218     * Prevent assigning several slots to primitive indices,
219     * to avoid some issues.
220     */
221    if (state->builder.shader->info.stage == MESA_SHADER_MESH &&
222        var->data.location == VARYING_SLOT_PRIMITIVE_INDICES &&
223        !nir_is_arrayed_io(var, state->builder.shader->info.stage))
224       return 1;
225 
226    return state->type_size(type, var->data.bindless);
227 }
228 
229 static nir_ssa_def *
get_io_offset(nir_builder * b,nir_deref_instr * deref,nir_ssa_def ** array_index,int (* type_size)(const struct glsl_type *,bool),unsigned * component,bool bts)230 get_io_offset(nir_builder *b, nir_deref_instr *deref,
231               nir_ssa_def **array_index,
232               int (*type_size)(const struct glsl_type *, bool),
233               unsigned *component, bool bts)
234 {
235    nir_deref_path path;
236    nir_deref_path_init(&path, deref, NULL);
237 
238    assert(path.path[0]->deref_type == nir_deref_type_var);
239    nir_deref_instr **p = &path.path[1];
240 
241    /* For arrayed I/O (e.g., per-vertex input arrays in geometry shader
242     * inputs), skip the outermost array index.  Process the rest normally.
243     */
244    if (array_index != NULL) {
245       assert((*p)->deref_type == nir_deref_type_array);
246       *array_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
247       p++;
248    }
249 
250    if (path.path[0]->var->data.compact) {
251       assert((*p)->deref_type == nir_deref_type_array);
252       assert(glsl_type_is_scalar((*p)->type));
253 
254       /* We always lower indirect dereferences for "compact" array vars. */
255       const unsigned index = nir_src_as_uint((*p)->arr.index);
256       const unsigned total_offset = *component + index;
257       const unsigned slot_offset = total_offset / 4;
258       *component = total_offset % 4;
259       return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
260    }
261 
262    /* Just emit code and let constant-folding go to town */
263    nir_ssa_def *offset = nir_imm_int(b, 0);
264 
265    for (; *p; p++) {
266       if ((*p)->deref_type == nir_deref_type_array) {
267          unsigned size = type_size((*p)->type, bts);
268 
269          nir_ssa_def *mul =
270             nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
271 
272          offset = nir_iadd(b, offset, mul);
273       } else if ((*p)->deref_type == nir_deref_type_struct) {
274          /* p starts at path[1], so this is safe */
275          nir_deref_instr *parent = *(p - 1);
276 
277          unsigned field_offset = 0;
278          for (unsigned i = 0; i < (*p)->strct.index; i++) {
279             field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
280          }
281          offset = nir_iadd_imm(b, offset, field_offset);
282       } else {
283          unreachable("Unsupported deref type");
284       }
285    }
286 
287    nir_deref_path_finish(&path);
288 
289    return offset;
290 }
291 
292 static nir_ssa_def *
emit_load(struct lower_io_state * state,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,unsigned num_components,unsigned bit_size,nir_alu_type dest_type)293 emit_load(struct lower_io_state *state,
294           nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
295           unsigned component, unsigned num_components, unsigned bit_size,
296           nir_alu_type dest_type)
297 {
298    nir_builder *b = &state->builder;
299    const nir_shader *nir = b->shader;
300    nir_variable_mode mode = var->data.mode;
301    nir_ssa_def *barycentric = NULL;
302 
303    nir_intrinsic_op op;
304    switch (mode) {
305    case nir_var_shader_in:
306       if (nir->info.stage == MESA_SHADER_FRAGMENT &&
307           nir->options->use_interpolated_input_intrinsics &&
308           var->data.interpolation != INTERP_MODE_FLAT &&
309           !var->data.per_primitive) {
310          if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
311             assert(array_index != NULL);
312             op = nir_intrinsic_load_input_vertex;
313          } else {
314             assert(array_index == NULL);
315 
316             nir_intrinsic_op bary_op;
317             if (var->data.sample ||
318                 (state->options & nir_lower_io_force_sample_interpolation))
319                bary_op = nir_intrinsic_load_barycentric_sample;
320             else if (var->data.centroid)
321                bary_op = nir_intrinsic_load_barycentric_centroid;
322             else
323                bary_op = nir_intrinsic_load_barycentric_pixel;
324 
325             barycentric = nir_load_barycentric(&state->builder, bary_op,
326                                                var->data.interpolation);
327             op = nir_intrinsic_load_interpolated_input;
328          }
329       } else {
330          op = array_index ? nir_intrinsic_load_per_vertex_input :
331                             nir_intrinsic_load_input;
332       }
333       break;
334    case nir_var_shader_out:
335       op = !array_index            ? nir_intrinsic_load_output :
336            var->data.per_primitive ? nir_intrinsic_load_per_primitive_output :
337                                      nir_intrinsic_load_per_vertex_output;
338       break;
339    case nir_var_uniform:
340       op = nir_intrinsic_load_uniform;
341       break;
342    default:
343       unreachable("Unknown variable mode");
344    }
345 
346    nir_intrinsic_instr *load =
347       nir_intrinsic_instr_create(state->builder.shader, op);
348    load->num_components = num_components;
349 
350    nir_intrinsic_set_base(load, var->data.driver_location);
351    if (mode == nir_var_shader_in || mode == nir_var_shader_out)
352       nir_intrinsic_set_component(load, component);
353 
354    if (load->intrinsic == nir_intrinsic_load_uniform)
355       nir_intrinsic_set_range(load,
356                               state->type_size(var->type, var->data.bindless));
357 
358    if (nir_intrinsic_has_access(load))
359       nir_intrinsic_set_access(load, var->data.access);
360 
361    nir_intrinsic_set_dest_type(load, dest_type);
362 
363    if (load->intrinsic != nir_intrinsic_load_uniform) {
364       nir_io_semantics semantics = {0};
365       semantics.location = var->data.location;
366       semantics.num_slots = get_number_of_slots(state, var);
367       semantics.fb_fetch_output = var->data.fb_fetch_output;
368       semantics.medium_precision =
369          var->data.precision == GLSL_PRECISION_MEDIUM ||
370          var->data.precision == GLSL_PRECISION_LOW;
371       nir_intrinsic_set_io_semantics(load, semantics);
372    }
373 
374    if (array_index) {
375       load->src[0] = nir_src_for_ssa(array_index);
376       load->src[1] = nir_src_for_ssa(offset);
377    } else if (barycentric) {
378       load->src[0] = nir_src_for_ssa(barycentric);
379       load->src[1] = nir_src_for_ssa(offset);
380    } else {
381       load->src[0] = nir_src_for_ssa(offset);
382    }
383 
384    nir_ssa_dest_init(&load->instr, &load->dest,
385                      num_components, bit_size, NULL);
386    nir_builder_instr_insert(b, &load->instr);
387 
388    return &load->dest.ssa;
389 }
390 
391 static nir_ssa_def *
lower_load(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,const struct glsl_type * type)392 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
393            nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
394            unsigned component, const struct glsl_type *type)
395 {
396    assert(intrin->dest.is_ssa);
397    if (intrin->dest.ssa.bit_size == 64 &&
398        (state->options & nir_lower_io_lower_64bit_to_32)) {
399       nir_builder *b = &state->builder;
400 
401       const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
402 
403       nir_ssa_def *comp64[4];
404       assert(component == 0 || component == 2);
405       unsigned dest_comp = 0;
406       while (dest_comp < intrin->dest.ssa.num_components) {
407          const unsigned num_comps =
408             MIN2(intrin->dest.ssa.num_components - dest_comp,
409                  (4 - component) / 2);
410 
411          nir_ssa_def *data32 =
412             emit_load(state, array_index, var, offset, component,
413                       num_comps * 2, 32, nir_type_uint32);
414          for (unsigned i = 0; i < num_comps; i++) {
415             comp64[dest_comp + i] =
416                nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
417          }
418 
419          /* Only the first store has a component offset */
420          component = 0;
421          dest_comp += num_comps;
422          offset = nir_iadd_imm(b, offset, slot_size);
423       }
424 
425       return nir_vec(b, comp64, intrin->dest.ssa.num_components);
426    } else if (intrin->dest.ssa.bit_size == 1) {
427       /* Booleans are 32-bit */
428       assert(glsl_type_is_boolean(type));
429       return nir_b2b1(&state->builder,
430                       emit_load(state, array_index, var, offset, component,
431                                 intrin->dest.ssa.num_components, 32,
432                                 nir_type_bool32));
433    } else {
434       return emit_load(state, array_index, var, offset, component,
435                        intrin->dest.ssa.num_components,
436                        intrin->dest.ssa.bit_size,
437                        nir_get_nir_type_for_glsl_type(type));
438    }
439 }
440 
441 static void
emit_store(struct lower_io_state * state,nir_ssa_def * data,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,unsigned num_components,nir_component_mask_t write_mask,nir_alu_type src_type)442 emit_store(struct lower_io_state *state, nir_ssa_def *data,
443            nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
444            unsigned component, unsigned num_components,
445            nir_component_mask_t write_mask, nir_alu_type src_type)
446 {
447    nir_builder *b = &state->builder;
448 
449    assert(var->data.mode == nir_var_shader_out);
450    nir_intrinsic_op op =
451       !array_index            ? nir_intrinsic_store_output :
452       var->data.per_primitive ? nir_intrinsic_store_per_primitive_output :
453                                 nir_intrinsic_store_per_vertex_output;
454 
455    nir_intrinsic_instr *store =
456       nir_intrinsic_instr_create(state->builder.shader, op);
457    store->num_components = num_components;
458 
459    store->src[0] = nir_src_for_ssa(data);
460 
461    nir_intrinsic_set_base(store, var->data.driver_location);
462    nir_intrinsic_set_component(store, component);
463    nir_intrinsic_set_src_type(store, src_type);
464 
465    nir_intrinsic_set_write_mask(store, write_mask);
466 
467    if (nir_intrinsic_has_access(store))
468       nir_intrinsic_set_access(store, var->data.access);
469 
470    if (array_index)
471       store->src[1] = nir_src_for_ssa(array_index);
472 
473    store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset);
474 
475    unsigned gs_streams = 0;
476    if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) {
477       if (var->data.stream & NIR_STREAM_PACKED) {
478          gs_streams = var->data.stream & ~NIR_STREAM_PACKED;
479       } else {
480          assert(var->data.stream < 4);
481          gs_streams = 0;
482          for (unsigned i = 0; i < num_components; ++i)
483             gs_streams |= var->data.stream << (2 * i);
484       }
485    }
486 
487    nir_io_semantics semantics = {0};
488    semantics.location = var->data.location;
489    semantics.num_slots = get_number_of_slots(state, var);
490    semantics.dual_source_blend_index = var->data.index;
491    semantics.gs_streams = gs_streams;
492    semantics.medium_precision =
493       var->data.precision == GLSL_PRECISION_MEDIUM ||
494       var->data.precision == GLSL_PRECISION_LOW;
495    semantics.per_view = var->data.per_view;
496    semantics.invariant = var->data.invariant;
497 
498    nir_intrinsic_set_io_semantics(store, semantics);
499 
500    nir_builder_instr_insert(b, &store->instr);
501 }
502 
503 static void
lower_store(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,const struct glsl_type * type)504 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
505             nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
506             unsigned component, const struct glsl_type *type)
507 {
508    assert(intrin->src[1].is_ssa);
509    if (intrin->src[1].ssa->bit_size == 64 &&
510        (state->options & nir_lower_io_lower_64bit_to_32)) {
511       nir_builder *b = &state->builder;
512 
513       const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
514 
515       assert(component == 0 || component == 2);
516       unsigned src_comp = 0;
517       nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
518       while (src_comp < intrin->num_components) {
519          const unsigned num_comps =
520             MIN2(intrin->num_components - src_comp,
521                  (4 - component) / 2);
522 
523          if (write_mask & BITFIELD_MASK(num_comps)) {
524             nir_ssa_def *data =
525                nir_channels(b, intrin->src[1].ssa,
526                             BITFIELD_RANGE(src_comp, num_comps));
527             nir_ssa_def *data32 = nir_bitcast_vector(b, data, 32);
528 
529             nir_component_mask_t write_mask32 = 0;
530             for (unsigned i = 0; i < num_comps; i++) {
531                if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
532                   write_mask32 |= 3 << (i * 2);
533             }
534 
535             emit_store(state, data32, array_index, var, offset,
536                        component, data32->num_components, write_mask32,
537                        nir_type_uint32);
538          }
539 
540          /* Only the first store has a component offset */
541          component = 0;
542          src_comp += num_comps;
543          write_mask >>= num_comps;
544          offset = nir_iadd_imm(b, offset, slot_size);
545       }
546    } else if (intrin->dest.ssa.bit_size == 1) {
547       /* Booleans are 32-bit */
548       assert(glsl_type_is_boolean(type));
549       nir_ssa_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
550       emit_store(state, b32_val, array_index, var, offset,
551                  component, intrin->num_components,
552                  nir_intrinsic_write_mask(intrin),
553                  nir_type_bool32);
554    } else {
555       emit_store(state, intrin->src[1].ssa, array_index, var, offset,
556                  component, intrin->num_components,
557                  nir_intrinsic_write_mask(intrin),
558                  nir_get_nir_type_for_glsl_type(type));
559    }
560 }
561 
562 static nir_ssa_def *
lower_interpolate_at(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_variable * var,nir_ssa_def * offset,unsigned component,const struct glsl_type * type)563 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
564                      nir_variable *var, nir_ssa_def *offset, unsigned component,
565                      const struct glsl_type *type)
566 {
567    nir_builder *b = &state->builder;
568    assert(var->data.mode == nir_var_shader_in);
569 
570    /* Ignore interpolateAt() for flat variables - flat is flat. Lower
571     * interpolateAtVertex() for explicit variables.
572     */
573    if (var->data.interpolation == INTERP_MODE_FLAT ||
574        var->data.interpolation == INTERP_MODE_EXPLICIT) {
575       nir_ssa_def *vertex_index = NULL;
576 
577       if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
578          assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
579          vertex_index = intrin->src[1].ssa;
580       }
581 
582       return lower_load(intrin, state, vertex_index, var, offset, component, type);
583    }
584 
585    /* None of the supported APIs allow interpolation on 64-bit things */
586    assert(intrin->dest.is_ssa && intrin->dest.ssa.bit_size <= 32);
587 
588    nir_intrinsic_op bary_op;
589    switch (intrin->intrinsic) {
590    case nir_intrinsic_interp_deref_at_centroid:
591       bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
592                 nir_intrinsic_load_barycentric_sample :
593                 nir_intrinsic_load_barycentric_centroid;
594       break;
595    case nir_intrinsic_interp_deref_at_sample:
596       bary_op = nir_intrinsic_load_barycentric_at_sample;
597       break;
598    case nir_intrinsic_interp_deref_at_offset:
599       bary_op = nir_intrinsic_load_barycentric_at_offset;
600       break;
601    default:
602       unreachable("Bogus interpolateAt() intrinsic.");
603    }
604 
605    nir_intrinsic_instr *bary_setup =
606       nir_intrinsic_instr_create(state->builder.shader, bary_op);
607 
608    nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
609    nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
610 
611    if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
612        intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
613        intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
614       nir_src_copy(&bary_setup->src[0], &intrin->src[1]);
615 
616    nir_builder_instr_insert(b, &bary_setup->instr);
617 
618    nir_io_semantics semantics = {0};
619    semantics.location = var->data.location;
620    semantics.num_slots = get_number_of_slots(state, var);
621    semantics.medium_precision =
622       var->data.precision == GLSL_PRECISION_MEDIUM ||
623       var->data.precision == GLSL_PRECISION_LOW;
624 
625    assert(intrin->dest.is_ssa);
626    nir_ssa_def *load =
627       nir_load_interpolated_input(&state->builder,
628                                   intrin->dest.ssa.num_components,
629                                   intrin->dest.ssa.bit_size,
630                                   &bary_setup->dest.ssa,
631                                   offset,
632                                   .base = var->data.driver_location,
633                                   .component = component,
634                                   .io_semantics = semantics);
635 
636    return load;
637 }
638 
639 static bool
nir_lower_io_block(nir_block * block,struct lower_io_state * state)640 nir_lower_io_block(nir_block *block,
641                    struct lower_io_state *state)
642 {
643    nir_builder *b = &state->builder;
644    const nir_shader_compiler_options *options = b->shader->options;
645    bool progress = false;
646 
647    nir_foreach_instr_safe(instr, block) {
648       if (instr->type != nir_instr_type_intrinsic)
649          continue;
650 
651       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
652 
653       switch (intrin->intrinsic) {
654       case nir_intrinsic_load_deref:
655       case nir_intrinsic_store_deref:
656          /* We can lower the io for this nir instrinsic */
657          break;
658       case nir_intrinsic_interp_deref_at_centroid:
659       case nir_intrinsic_interp_deref_at_sample:
660       case nir_intrinsic_interp_deref_at_offset:
661       case nir_intrinsic_interp_deref_at_vertex:
662          /* We can optionally lower these to load_interpolated_input */
663          if (options->use_interpolated_input_intrinsics ||
664              options->lower_interpolate_at)
665             break;
666          FALLTHROUGH;
667       default:
668          /* We can't lower the io for this nir instrinsic, so skip it */
669          continue;
670       }
671 
672       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
673       if (!nir_deref_mode_is_one_of(deref, state->modes))
674          continue;
675 
676       nir_variable *var = nir_deref_instr_get_variable(deref);
677 
678       b->cursor = nir_before_instr(instr);
679 
680       const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage);
681 
682       nir_ssa_def *offset;
683       nir_ssa_def *array_index = NULL;
684       unsigned component_offset = var->data.location_frac;
685       bool bindless_type_size = var->data.mode == nir_var_shader_in ||
686                                 var->data.mode == nir_var_shader_out ||
687                                 var->data.bindless;
688 
689      if (nir_deref_instr_is_known_out_of_bounds(deref)) {
690         /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
691          *
692          *    In the subsections described above for array, vector, matrix and
693          *    structure accesses, any out-of-bounds access produced undefined
694          *    behavior....
695          *    Out-of-bounds reads return undefined values, which
696          *    include values from other variables of the active program or zero.
697          *    Out-of-bounds writes may be discarded or overwrite
698          *    other variables of the active program.
699          *
700          * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero
701          * for reads.
702          *
703          * Otherwise get_io_offset would return out-of-bound offset which may
704          * result in out-of-bound loading/storing of inputs/outputs,
705          * that could cause issues in drivers down the line.
706          */
707          if (intrin->intrinsic != nir_intrinsic_store_deref) {
708             nir_ssa_def *zero =
709                nir_imm_zero(b, intrin->dest.ssa.num_components,
710                              intrin->dest.ssa.bit_size);
711             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
712                                   zero);
713          }
714 
715          nir_instr_remove(&intrin->instr);
716          progress = true;
717          continue;
718       }
719 
720       offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL,
721                              state->type_size, &component_offset,
722                              bindless_type_size);
723 
724       nir_ssa_def *replacement = NULL;
725 
726       switch (intrin->intrinsic) {
727       case nir_intrinsic_load_deref:
728          replacement = lower_load(intrin, state, array_index, var, offset,
729                                   component_offset, deref->type);
730          break;
731 
732       case nir_intrinsic_store_deref:
733          lower_store(intrin, state, array_index, var, offset,
734                      component_offset, deref->type);
735          break;
736 
737       case nir_intrinsic_interp_deref_at_centroid:
738       case nir_intrinsic_interp_deref_at_sample:
739       case nir_intrinsic_interp_deref_at_offset:
740       case nir_intrinsic_interp_deref_at_vertex:
741          assert(array_index == NULL);
742          replacement = lower_interpolate_at(intrin, state, var, offset,
743                                             component_offset, deref->type);
744          break;
745 
746       default:
747          continue;
748       }
749 
750       if (replacement) {
751          nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
752                                   replacement);
753       }
754       nir_instr_remove(&intrin->instr);
755       progress = true;
756    }
757 
758    return progress;
759 }
760 
761 static bool
nir_lower_io_impl(nir_function_impl * impl,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)762 nir_lower_io_impl(nir_function_impl *impl,
763                   nir_variable_mode modes,
764                   int (*type_size)(const struct glsl_type *, bool),
765                   nir_lower_io_options options)
766 {
767    struct lower_io_state state;
768    bool progress = false;
769 
770    nir_builder_init(&state.builder, impl);
771    state.dead_ctx = ralloc_context(NULL);
772    state.modes = modes;
773    state.type_size = type_size;
774    state.options = options;
775 
776    ASSERTED nir_variable_mode supported_modes =
777       nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
778    assert(!(modes & ~supported_modes));
779 
780    nir_foreach_block(block, impl) {
781       progress |= nir_lower_io_block(block, &state);
782    }
783 
784    ralloc_free(state.dead_ctx);
785 
786    nir_metadata_preserve(impl, nir_metadata_none);
787 
788    return progress;
789 }
790 
791 /** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
792  *
793  * This pass is intended to be used for cross-stage shader I/O and driver-
794  * managed uniforms to turn deref-based access into a simpler model using
795  * locations or offsets.  For fragment shader inputs, it can optionally turn
796  * load_deref into an explicit interpolation using barycentrics coming from
797  * one of the load_barycentric_* intrinsics.  This pass requires that all
798  * deref chains are complete and contain no casts.
799  */
800 bool
nir_lower_io(nir_shader * shader,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)801 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
802              int (*type_size)(const struct glsl_type *, bool),
803              nir_lower_io_options options)
804 {
805    bool progress = false;
806 
807    nir_foreach_function(function, shader) {
808       if (function->impl) {
809          progress |= nir_lower_io_impl(function->impl, modes,
810                                        type_size, options);
811       }
812    }
813 
814    return progress;
815 }
816 
817 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)818 type_scalar_size_bytes(const struct glsl_type *type)
819 {
820    assert(glsl_type_is_vector_or_scalar(type) ||
821           glsl_type_is_matrix(type));
822    return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
823 }
824 
825 static nir_ssa_def *
build_addr_iadd(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,nir_ssa_def * offset)826 build_addr_iadd(nir_builder *b, nir_ssa_def *addr,
827                 nir_address_format addr_format,
828                 nir_variable_mode modes,
829                 nir_ssa_def *offset)
830 {
831    assert(offset->num_components == 1);
832 
833    switch (addr_format) {
834    case nir_address_format_32bit_global:
835    case nir_address_format_64bit_global:
836    case nir_address_format_32bit_offset:
837       assert(addr->bit_size == offset->bit_size);
838       assert(addr->num_components == 1);
839       return nir_iadd(b, addr, offset);
840 
841    case nir_address_format_2x32bit_global: {
842       assert(addr->num_components == 2);
843       nir_ssa_def *lo = nir_channel(b, addr, 0);
844       nir_ssa_def *hi = nir_channel(b, addr, 1);
845       nir_ssa_def *res_lo = nir_iadd(b, lo, offset);
846       nir_ssa_def *carry = nir_b2i32(b, nir_ult(b, res_lo, lo));
847       nir_ssa_def *res_hi = nir_iadd(b, hi, carry);
848       return nir_vec2(b, res_lo, res_hi);
849    }
850 
851    case nir_address_format_32bit_offset_as_64bit:
852       assert(addr->num_components == 1);
853       assert(offset->bit_size == 32);
854       return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset));
855 
856    case nir_address_format_64bit_global_32bit_offset:
857    case nir_address_format_64bit_bounded_global:
858       assert(addr->num_components == 4);
859       assert(addr->bit_size == offset->bit_size);
860       return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3);
861 
862    case nir_address_format_32bit_index_offset:
863       assert(addr->num_components == 2);
864       assert(addr->bit_size == offset->bit_size);
865       return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1);
866 
867    case nir_address_format_32bit_index_offset_pack64:
868       assert(addr->num_components == 1);
869       assert(offset->bit_size == 32);
870       return nir_pack_64_2x32_split(b,
871                                     nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset),
872                                     nir_unpack_64_2x32_split_y(b, addr));
873 
874    case nir_address_format_vec2_index_32bit_offset:
875       assert(addr->num_components == 3);
876       assert(offset->bit_size == 32);
877       return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2);
878 
879    case nir_address_format_62bit_generic:
880       assert(addr->num_components == 1);
881       assert(addr->bit_size == 64);
882       assert(offset->bit_size == 64);
883       if (!(modes & ~(nir_var_function_temp |
884                       nir_var_shader_temp |
885                       nir_var_mem_shared))) {
886          /* If we're sure it's one of these modes, we can do an easy 32-bit
887           * addition and don't need to bother with 64-bit math.
888           */
889          nir_ssa_def *addr32 = nir_unpack_64_2x32_split_x(b, addr);
890          nir_ssa_def *type = nir_unpack_64_2x32_split_y(b, addr);
891          addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset));
892          return nir_pack_64_2x32_split(b, addr32, type);
893       } else {
894          return nir_iadd(b, addr, offset);
895       }
896 
897    case nir_address_format_logical:
898       unreachable("Unsupported address format");
899    }
900    unreachable("Invalid address format");
901 }
902 
903 static unsigned
addr_get_offset_bit_size(nir_ssa_def * addr,nir_address_format addr_format)904 addr_get_offset_bit_size(nir_ssa_def *addr, nir_address_format addr_format)
905 {
906    if (addr_format == nir_address_format_32bit_offset_as_64bit ||
907        addr_format == nir_address_format_32bit_index_offset_pack64)
908       return 32;
909    return addr->bit_size;
910 }
911 
912 static nir_ssa_def *
build_addr_iadd_imm(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,int64_t offset)913 build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
914                     nir_address_format addr_format,
915                     nir_variable_mode modes,
916                     int64_t offset)
917 {
918    return build_addr_iadd(b, addr, addr_format, modes,
919                              nir_imm_intN_t(b, offset,
920                                             addr_get_offset_bit_size(addr, addr_format)));
921 }
922 
923 static nir_ssa_def *
build_addr_for_var(nir_builder * b,nir_variable * var,nir_address_format addr_format)924 build_addr_for_var(nir_builder *b, nir_variable *var,
925                    nir_address_format addr_format)
926 {
927    assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared |
928                             nir_var_mem_task_payload |
929                             nir_var_mem_global |
930                             nir_var_shader_temp | nir_var_function_temp |
931                             nir_var_mem_push_const | nir_var_mem_constant));
932 
933    const unsigned num_comps = nir_address_format_num_components(addr_format);
934    const unsigned bit_size = nir_address_format_bit_size(addr_format);
935 
936    switch (addr_format) {
937    case nir_address_format_2x32bit_global:
938    case nir_address_format_32bit_global:
939    case nir_address_format_64bit_global: {
940       nir_ssa_def *base_addr;
941       switch (var->data.mode) {
942       case nir_var_shader_temp:
943          base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0);
944          break;
945 
946       case nir_var_function_temp:
947          base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1);
948          break;
949 
950       case nir_var_mem_constant:
951          base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size);
952          break;
953 
954       case nir_var_mem_shared:
955          base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size);
956          break;
957 
958       case nir_var_mem_global:
959          base_addr = nir_load_global_base_ptr(b, num_comps, bit_size);
960          break;
961 
962       default:
963          unreachable("Unsupported variable mode");
964       }
965 
966       return build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode,
967                                     var->data.driver_location);
968    }
969 
970    case nir_address_format_32bit_offset:
971       assert(var->data.driver_location <= UINT32_MAX);
972       return nir_imm_int(b, var->data.driver_location);
973 
974    case nir_address_format_32bit_offset_as_64bit:
975       assert(var->data.driver_location <= UINT32_MAX);
976       return nir_imm_int64(b, var->data.driver_location);
977 
978    case nir_address_format_62bit_generic:
979       switch (var->data.mode) {
980       case nir_var_shader_temp:
981       case nir_var_function_temp:
982          assert(var->data.driver_location <= UINT32_MAX);
983          return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64);
984 
985       case nir_var_mem_shared:
986          assert(var->data.driver_location <= UINT32_MAX);
987          return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64);
988 
989       case nir_var_mem_global:
990          return nir_iadd_imm(b, nir_load_global_base_ptr(b, num_comps, bit_size),
991                                 var->data.driver_location);
992 
993       default:
994          unreachable("Unsupported variable mode");
995       }
996 
997    default:
998       unreachable("Unsupported address format");
999    }
1000 }
1001 
1002 static nir_ssa_def *
build_runtime_addr_mode_check(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode mode)1003 build_runtime_addr_mode_check(nir_builder *b, nir_ssa_def *addr,
1004                               nir_address_format addr_format,
1005                               nir_variable_mode mode)
1006 {
1007    /* The compile-time check failed; do a run-time check */
1008    switch (addr_format) {
1009    case nir_address_format_62bit_generic: {
1010       assert(addr->num_components == 1);
1011       assert(addr->bit_size == 64);
1012       nir_ssa_def *mode_enum = nir_ushr(b, addr, nir_imm_int(b, 62));
1013       switch (mode) {
1014       case nir_var_function_temp:
1015       case nir_var_shader_temp:
1016          return nir_ieq_imm(b, mode_enum, 0x2);
1017 
1018       case nir_var_mem_shared:
1019          return nir_ieq_imm(b, mode_enum, 0x1);
1020 
1021       case nir_var_mem_global:
1022          return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0),
1023                            nir_ieq_imm(b, mode_enum, 0x3));
1024 
1025       default:
1026          unreachable("Invalid mode check intrinsic");
1027       }
1028    }
1029 
1030    default:
1031       unreachable("Unsupported address mode");
1032    }
1033 }
1034 
1035 unsigned
nir_address_format_bit_size(nir_address_format addr_format)1036 nir_address_format_bit_size(nir_address_format addr_format)
1037 {
1038    switch (addr_format) {
1039    case nir_address_format_32bit_global:              return 32;
1040    case nir_address_format_2x32bit_global:            return 32;
1041    case nir_address_format_64bit_global:              return 64;
1042    case nir_address_format_64bit_global_32bit_offset: return 32;
1043    case nir_address_format_64bit_bounded_global:      return 32;
1044    case nir_address_format_32bit_index_offset:        return 32;
1045    case nir_address_format_32bit_index_offset_pack64: return 64;
1046    case nir_address_format_vec2_index_32bit_offset:   return 32;
1047    case nir_address_format_62bit_generic:             return 64;
1048    case nir_address_format_32bit_offset:              return 32;
1049    case nir_address_format_32bit_offset_as_64bit:     return 64;
1050    case nir_address_format_logical:                   return 32;
1051    }
1052    unreachable("Invalid address format");
1053 }
1054 
1055 unsigned
nir_address_format_num_components(nir_address_format addr_format)1056 nir_address_format_num_components(nir_address_format addr_format)
1057 {
1058    switch (addr_format) {
1059    case nir_address_format_32bit_global:              return 1;
1060    case nir_address_format_2x32bit_global:            return 2;
1061    case nir_address_format_64bit_global:              return 1;
1062    case nir_address_format_64bit_global_32bit_offset: return 4;
1063    case nir_address_format_64bit_bounded_global:      return 4;
1064    case nir_address_format_32bit_index_offset:        return 2;
1065    case nir_address_format_32bit_index_offset_pack64: return 1;
1066    case nir_address_format_vec2_index_32bit_offset:   return 3;
1067    case nir_address_format_62bit_generic:             return 1;
1068    case nir_address_format_32bit_offset:              return 1;
1069    case nir_address_format_32bit_offset_as_64bit:     return 1;
1070    case nir_address_format_logical:                   return 1;
1071    }
1072    unreachable("Invalid address format");
1073 }
1074 
1075 static nir_ssa_def *
addr_to_index(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format)1076 addr_to_index(nir_builder *b, nir_ssa_def *addr,
1077               nir_address_format addr_format)
1078 {
1079    switch (addr_format) {
1080    case nir_address_format_32bit_index_offset:
1081       assert(addr->num_components == 2);
1082       return nir_channel(b, addr, 0);
1083    case nir_address_format_32bit_index_offset_pack64:
1084       return nir_unpack_64_2x32_split_y(b, addr);
1085    case nir_address_format_vec2_index_32bit_offset:
1086       assert(addr->num_components == 3);
1087       return nir_channels(b, addr, 0x3);
1088    default: unreachable("Invalid address format");
1089    }
1090 }
1091 
1092 static nir_ssa_def *
addr_to_offset(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format)1093 addr_to_offset(nir_builder *b, nir_ssa_def *addr,
1094                nir_address_format addr_format)
1095 {
1096    switch (addr_format) {
1097    case nir_address_format_32bit_index_offset:
1098       assert(addr->num_components == 2);
1099       return nir_channel(b, addr, 1);
1100    case nir_address_format_32bit_index_offset_pack64:
1101       return nir_unpack_64_2x32_split_x(b, addr);
1102    case nir_address_format_vec2_index_32bit_offset:
1103       assert(addr->num_components == 3);
1104       return nir_channel(b, addr, 2);
1105    case nir_address_format_32bit_offset:
1106       return addr;
1107    case nir_address_format_32bit_offset_as_64bit:
1108    case nir_address_format_62bit_generic:
1109       return nir_u2u32(b, addr);
1110    default:
1111       unreachable("Invalid address format");
1112    }
1113 }
1114 
1115 /** Returns true if the given address format resolves to a global address */
1116 static bool
addr_format_is_global(nir_address_format addr_format,nir_variable_mode mode)1117 addr_format_is_global(nir_address_format addr_format,
1118                       nir_variable_mode mode)
1119 {
1120    if (addr_format == nir_address_format_62bit_generic)
1121       return mode == nir_var_mem_global;
1122 
1123    return addr_format == nir_address_format_32bit_global ||
1124           addr_format == nir_address_format_2x32bit_global ||
1125           addr_format == nir_address_format_64bit_global ||
1126           addr_format == nir_address_format_64bit_global_32bit_offset ||
1127           addr_format == nir_address_format_64bit_bounded_global;
1128 }
1129 
1130 static bool
addr_format_is_offset(nir_address_format addr_format,nir_variable_mode mode)1131 addr_format_is_offset(nir_address_format addr_format,
1132                       nir_variable_mode mode)
1133 {
1134    if (addr_format == nir_address_format_62bit_generic)
1135       return mode != nir_var_mem_global;
1136 
1137    return addr_format == nir_address_format_32bit_offset ||
1138           addr_format == nir_address_format_32bit_offset_as_64bit;
1139 }
1140 
1141 static nir_ssa_def *
addr_to_global(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format)1142 addr_to_global(nir_builder *b, nir_ssa_def *addr,
1143                nir_address_format addr_format)
1144 {
1145    switch (addr_format) {
1146    case nir_address_format_32bit_global:
1147    case nir_address_format_64bit_global:
1148    case nir_address_format_62bit_generic:
1149       assert(addr->num_components == 1);
1150       return addr;
1151 
1152    case nir_address_format_2x32bit_global:
1153       assert(addr->num_components == 2);
1154       return addr;
1155 
1156    case nir_address_format_64bit_global_32bit_offset:
1157    case nir_address_format_64bit_bounded_global:
1158       assert(addr->num_components == 4);
1159       return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)),
1160                          nir_u2u64(b, nir_channel(b, addr, 3)));
1161 
1162    case nir_address_format_32bit_index_offset:
1163    case nir_address_format_32bit_index_offset_pack64:
1164    case nir_address_format_vec2_index_32bit_offset:
1165    case nir_address_format_32bit_offset:
1166    case nir_address_format_32bit_offset_as_64bit:
1167    case nir_address_format_logical:
1168       unreachable("Cannot get a 64-bit address with this address format");
1169    }
1170 
1171    unreachable("Invalid address format");
1172 }
1173 
1174 static bool
addr_format_needs_bounds_check(nir_address_format addr_format)1175 addr_format_needs_bounds_check(nir_address_format addr_format)
1176 {
1177    return addr_format == nir_address_format_64bit_bounded_global;
1178 }
1179 
1180 static nir_ssa_def *
addr_is_in_bounds(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,unsigned size)1181 addr_is_in_bounds(nir_builder *b, nir_ssa_def *addr,
1182                   nir_address_format addr_format, unsigned size)
1183 {
1184    assert(addr_format == nir_address_format_64bit_bounded_global);
1185    assert(addr->num_components == 4);
1186    return nir_ige(b, nir_channel(b, addr, 2),
1187                      nir_iadd_imm(b, nir_channel(b, addr, 3), size));
1188 }
1189 
1190 static void
nir_get_explicit_deref_range(nir_deref_instr * deref,nir_address_format addr_format,uint32_t * out_base,uint32_t * out_range)1191 nir_get_explicit_deref_range(nir_deref_instr *deref,
1192                              nir_address_format addr_format,
1193                              uint32_t *out_base,
1194                              uint32_t *out_range)
1195 {
1196    uint32_t base = 0;
1197    uint32_t range = glsl_get_explicit_size(deref->type, false);
1198 
1199    while (true) {
1200       nir_deref_instr *parent = nir_deref_instr_parent(deref);
1201 
1202       switch (deref->deref_type) {
1203       case nir_deref_type_array:
1204       case nir_deref_type_array_wildcard:
1205       case nir_deref_type_ptr_as_array: {
1206          const unsigned stride = nir_deref_instr_array_stride(deref);
1207          if (stride == 0)
1208             goto fail;
1209 
1210          if (!parent)
1211             goto fail;
1212 
1213          if (deref->deref_type != nir_deref_type_array_wildcard &&
1214              nir_src_is_const(deref->arr.index)) {
1215             base += stride * nir_src_as_uint(deref->arr.index);
1216          } else {
1217             if (glsl_get_length(parent->type) == 0)
1218                goto fail;
1219             range += stride * (glsl_get_length(parent->type) - 1);
1220          }
1221          break;
1222       }
1223 
1224       case nir_deref_type_struct: {
1225          if (!parent)
1226             goto fail;
1227 
1228          base += glsl_get_struct_field_offset(parent->type, deref->strct.index);
1229          break;
1230       }
1231 
1232       case nir_deref_type_cast: {
1233          nir_instr *parent_instr = deref->parent.ssa->parent_instr;
1234 
1235          switch (parent_instr->type) {
1236          case nir_instr_type_load_const: {
1237             nir_load_const_instr *load = nir_instr_as_load_const(parent_instr);
1238 
1239             switch (addr_format) {
1240             case nir_address_format_32bit_offset:
1241                base += load->value[1].u32;
1242                break;
1243             case nir_address_format_32bit_index_offset:
1244                base += load->value[1].u32;
1245                break;
1246             case nir_address_format_vec2_index_32bit_offset:
1247                base += load->value[2].u32;
1248                break;
1249             default:
1250                goto fail;
1251             }
1252 
1253             *out_base = base;
1254             *out_range = range;
1255             return;
1256          }
1257 
1258          case nir_instr_type_intrinsic: {
1259             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr);
1260             switch (intr->intrinsic) {
1261             case nir_intrinsic_load_vulkan_descriptor:
1262                /* Assume that a load_vulkan_descriptor won't contribute to an
1263                 * offset within the resource.
1264                 */
1265                break;
1266             default:
1267                goto fail;
1268             }
1269 
1270             *out_base = base;
1271             *out_range = range;
1272             return;
1273          }
1274 
1275          default:
1276             goto fail;
1277          }
1278       }
1279 
1280       default:
1281          goto fail;
1282       }
1283 
1284       deref = parent;
1285    }
1286 
1287 fail:
1288    *out_base = 0;
1289    *out_range = ~0;
1290 }
1291 
1292 static nir_variable_mode
canonicalize_generic_modes(nir_variable_mode modes)1293 canonicalize_generic_modes(nir_variable_mode modes)
1294 {
1295    assert(modes != 0);
1296    if (util_bitcount(modes) == 1)
1297       return modes;
1298 
1299    assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp |
1300                       nir_var_mem_shared | nir_var_mem_global)));
1301 
1302    /* Canonicalize by converting shader_temp to function_temp */
1303    if (modes & nir_var_shader_temp) {
1304       modes &= ~nir_var_shader_temp;
1305       modes |= nir_var_function_temp;
1306    }
1307 
1308    return modes;
1309 }
1310 
1311 static nir_intrinsic_op
get_store_global_op_from_addr_format(nir_address_format addr_format)1312 get_store_global_op_from_addr_format(nir_address_format addr_format)
1313 {
1314    if (addr_format != nir_address_format_2x32bit_global)
1315       return nir_intrinsic_store_global;
1316    else
1317       return nir_intrinsic_store_global_2x32;
1318 }
1319 
1320 static nir_intrinsic_op
get_load_global_op_from_addr_format(nir_address_format addr_format)1321 get_load_global_op_from_addr_format(nir_address_format addr_format)
1322 {
1323    if (addr_format != nir_address_format_2x32bit_global)
1324       return nir_intrinsic_load_global;
1325    else
1326       return nir_intrinsic_load_global_2x32;
1327 }
1328 
1329 static nir_ssa_def *
build_explicit_io_load(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,unsigned num_components)1330 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
1331                        nir_ssa_def *addr, nir_address_format addr_format,
1332                        nir_variable_mode modes,
1333                        uint32_t align_mul, uint32_t align_offset,
1334                        unsigned num_components)
1335 {
1336    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1337    modes = canonicalize_generic_modes(modes);
1338 
1339    if (util_bitcount(modes) > 1) {
1340       if (addr_format_is_global(addr_format, modes)) {
1341          return build_explicit_io_load(b, intrin, addr, addr_format,
1342                                        nir_var_mem_global,
1343                                        align_mul, align_offset,
1344                                        num_components);
1345       } else if (modes & nir_var_function_temp) {
1346          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1347                                                       nir_var_function_temp));
1348          nir_ssa_def *res1 =
1349             build_explicit_io_load(b, intrin, addr, addr_format,
1350                                    nir_var_function_temp,
1351                                    align_mul, align_offset,
1352                                    num_components);
1353          nir_push_else(b, NULL);
1354          nir_ssa_def *res2 =
1355             build_explicit_io_load(b, intrin, addr, addr_format,
1356                                    modes & ~nir_var_function_temp,
1357                                    align_mul, align_offset,
1358                                    num_components);
1359          nir_pop_if(b, NULL);
1360          return nir_if_phi(b, res1, res2);
1361       } else {
1362          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1363                                                       nir_var_mem_shared));
1364          assert(modes & nir_var_mem_shared);
1365          nir_ssa_def *res1 =
1366             build_explicit_io_load(b, intrin, addr, addr_format,
1367                                    nir_var_mem_shared,
1368                                    align_mul, align_offset,
1369                                    num_components);
1370          nir_push_else(b, NULL);
1371          assert(modes & nir_var_mem_global);
1372          nir_ssa_def *res2 =
1373             build_explicit_io_load(b, intrin, addr, addr_format,
1374                                    nir_var_mem_global,
1375                                    align_mul, align_offset,
1376                                    num_components);
1377          nir_pop_if(b, NULL);
1378          return nir_if_phi(b, res1, res2);
1379       }
1380    }
1381 
1382    assert(util_bitcount(modes) == 1);
1383    const nir_variable_mode mode = modes;
1384 
1385    nir_intrinsic_op op;
1386    switch (intrin->intrinsic) {
1387    case nir_intrinsic_load_deref:
1388       switch (mode) {
1389       case nir_var_mem_ubo:
1390          if (addr_format == nir_address_format_64bit_global_32bit_offset)
1391             op = nir_intrinsic_load_global_constant_offset;
1392          else if (addr_format == nir_address_format_64bit_bounded_global)
1393             op = nir_intrinsic_load_global_constant_bounded;
1394          else if (addr_format_is_global(addr_format, mode))
1395             op = nir_intrinsic_load_global_constant;
1396          else
1397             op = nir_intrinsic_load_ubo;
1398          break;
1399       case nir_var_mem_ssbo:
1400          if (addr_format_is_global(addr_format, mode))
1401             op = nir_intrinsic_load_global;
1402          else
1403             op = nir_intrinsic_load_ssbo;
1404          break;
1405       case nir_var_mem_global:
1406          assert(addr_format_is_global(addr_format, mode));
1407          op = get_load_global_op_from_addr_format(addr_format);
1408          break;
1409       case nir_var_uniform:
1410          assert(addr_format_is_offset(addr_format, mode));
1411          assert(b->shader->info.stage == MESA_SHADER_KERNEL);
1412          op = nir_intrinsic_load_kernel_input;
1413          break;
1414       case nir_var_mem_shared:
1415          assert(addr_format_is_offset(addr_format, mode));
1416          op = nir_intrinsic_load_shared;
1417          break;
1418       case nir_var_mem_task_payload:
1419          assert(addr_format_is_offset(addr_format, mode));
1420          op = nir_intrinsic_load_task_payload;
1421          break;
1422       case nir_var_shader_temp:
1423       case nir_var_function_temp:
1424          if (addr_format_is_offset(addr_format, mode)) {
1425             op = nir_intrinsic_load_scratch;
1426          } else {
1427             assert(addr_format_is_global(addr_format, mode));
1428             op = get_load_global_op_from_addr_format(addr_format);
1429          }
1430          break;
1431       case nir_var_mem_push_const:
1432          assert(addr_format == nir_address_format_32bit_offset);
1433          op = nir_intrinsic_load_push_constant;
1434          break;
1435       case nir_var_mem_constant:
1436          if (addr_format_is_offset(addr_format, mode)) {
1437             op = nir_intrinsic_load_constant;
1438          } else {
1439             assert(addr_format_is_global(addr_format, mode));
1440             op = get_load_global_op_from_addr_format(addr_format);
1441          }
1442          break;
1443       default:
1444          unreachable("Unsupported explicit IO variable mode");
1445       }
1446       break;
1447 
1448    case nir_intrinsic_load_deref_block_intel:
1449       switch (mode) {
1450       case nir_var_mem_ssbo:
1451          if (addr_format_is_global(addr_format, mode))
1452             op = nir_intrinsic_load_global_block_intel;
1453          else
1454             op = nir_intrinsic_load_ssbo_block_intel;
1455          break;
1456       case nir_var_mem_global:
1457          op = nir_intrinsic_load_global_block_intel;
1458          break;
1459       case nir_var_mem_shared:
1460          op = nir_intrinsic_load_shared_block_intel;
1461          break;
1462       default:
1463          unreachable("Unsupported explicit IO variable mode");
1464       }
1465       break;
1466 
1467    default:
1468       unreachable("Invalid intrinsic");
1469    }
1470 
1471    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
1472 
1473    if (op == nir_intrinsic_load_global_constant_offset) {
1474       assert(addr_format == nir_address_format_64bit_global_32bit_offset);
1475       load->src[0] = nir_src_for_ssa(
1476          nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)));
1477       load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1478    } else if (op == nir_intrinsic_load_global_constant_bounded) {
1479       assert(addr_format == nir_address_format_64bit_bounded_global);
1480       load->src[0] = nir_src_for_ssa(
1481          nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)));
1482       load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1483       load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2));
1484    } else if (addr_format_is_global(addr_format, mode)) {
1485       load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1486    } else if (addr_format_is_offset(addr_format, mode)) {
1487       assert(addr->num_components == 1);
1488       load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1489    } else {
1490       load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1491       load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1492    }
1493 
1494    if (nir_intrinsic_has_access(load))
1495       nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
1496 
1497    if (op == nir_intrinsic_load_constant) {
1498       nir_intrinsic_set_base(load, 0);
1499       nir_intrinsic_set_range(load, b->shader->constant_data_size);
1500    } else if (mode == nir_var_mem_push_const) {
1501       /* Push constants are required to be able to be chased back to the
1502        * variable so we can provide a base/range.
1503        */
1504       nir_variable *var = nir_deref_instr_get_variable(deref);
1505       nir_intrinsic_set_base(load, 0);
1506       nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false));
1507    }
1508 
1509    unsigned bit_size = intrin->dest.ssa.bit_size;
1510    if (bit_size == 1) {
1511       /* TODO: Make the native bool bit_size an option. */
1512       bit_size = 32;
1513    }
1514 
1515    if (nir_intrinsic_has_align(load))
1516       nir_intrinsic_set_align(load, align_mul, align_offset);
1517 
1518    if (nir_intrinsic_has_range_base(load)) {
1519       unsigned base, range;
1520       nir_get_explicit_deref_range(deref, addr_format, &base, &range);
1521       nir_intrinsic_set_range_base(load, base);
1522       nir_intrinsic_set_range(load, range);
1523    }
1524 
1525    assert(intrin->dest.is_ssa);
1526    load->num_components = num_components;
1527    nir_ssa_dest_init(&load->instr, &load->dest, num_components,
1528                      bit_size, NULL);
1529 
1530    assert(bit_size % 8 == 0);
1531 
1532    nir_ssa_def *result;
1533    if (addr_format_needs_bounds_check(addr_format) &&
1534        op != nir_intrinsic_load_global_constant_bounded) {
1535       /* We don't need to bounds-check global_constant_bounded because bounds
1536        * checking is handled by the intrinsic itself.
1537        *
1538        * The Vulkan spec for robustBufferAccess gives us quite a few options
1539        * as to what we can do with an OOB read.  Unfortunately, returning
1540        * undefined values isn't one of them so we return an actual zero.
1541        */
1542       nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
1543 
1544       /* TODO: Better handle block_intel. */
1545       const unsigned load_size = (bit_size / 8) * load->num_components;
1546       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
1547 
1548       nir_builder_instr_insert(b, &load->instr);
1549 
1550       nir_pop_if(b, NULL);
1551 
1552       result = nir_if_phi(b, &load->dest.ssa, zero);
1553    } else {
1554       nir_builder_instr_insert(b, &load->instr);
1555       result = &load->dest.ssa;
1556    }
1557 
1558    if (intrin->dest.ssa.bit_size == 1) {
1559       /* For shared, we can go ahead and use NIR's and/or the back-end's
1560        * standard encoding for booleans rather than forcing a 0/1 boolean.
1561        * This should save an instruction or two.
1562        */
1563       if (mode == nir_var_mem_shared ||
1564           mode == nir_var_shader_temp ||
1565           mode == nir_var_function_temp)
1566          result = nir_b2b1(b, result);
1567       else
1568          result = nir_i2b(b, result);
1569    }
1570 
1571    return result;
1572 }
1573 
1574 static void
build_explicit_io_store(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,nir_ssa_def * value,nir_component_mask_t write_mask)1575 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
1576                         nir_ssa_def *addr, nir_address_format addr_format,
1577                         nir_variable_mode modes,
1578                         uint32_t align_mul, uint32_t align_offset,
1579                         nir_ssa_def *value, nir_component_mask_t write_mask)
1580 {
1581    modes = canonicalize_generic_modes(modes);
1582 
1583    if (util_bitcount(modes) > 1) {
1584       if (addr_format_is_global(addr_format, modes)) {
1585          build_explicit_io_store(b, intrin, addr, addr_format,
1586                                  nir_var_mem_global,
1587                                  align_mul, align_offset,
1588                                  value, write_mask);
1589       } else if (modes & nir_var_function_temp) {
1590          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1591                                                       nir_var_function_temp));
1592          build_explicit_io_store(b, intrin, addr, addr_format,
1593                                  nir_var_function_temp,
1594                                  align_mul, align_offset,
1595                                  value, write_mask);
1596          nir_push_else(b, NULL);
1597          build_explicit_io_store(b, intrin, addr, addr_format,
1598                                  modes & ~nir_var_function_temp,
1599                                  align_mul, align_offset,
1600                                  value, write_mask);
1601          nir_pop_if(b, NULL);
1602       } else {
1603          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1604                                                       nir_var_mem_shared));
1605          assert(modes & nir_var_mem_shared);
1606          build_explicit_io_store(b, intrin, addr, addr_format,
1607                                  nir_var_mem_shared,
1608                                  align_mul, align_offset,
1609                                  value, write_mask);
1610          nir_push_else(b, NULL);
1611          assert(modes & nir_var_mem_global);
1612          build_explicit_io_store(b, intrin, addr, addr_format,
1613                                  nir_var_mem_global,
1614                                  align_mul, align_offset,
1615                                  value, write_mask);
1616          nir_pop_if(b, NULL);
1617       }
1618       return;
1619    }
1620 
1621    assert(util_bitcount(modes) == 1);
1622    const nir_variable_mode mode = modes;
1623 
1624    nir_intrinsic_op op;
1625    switch (intrin->intrinsic) {
1626    case nir_intrinsic_store_deref:
1627       assert(write_mask != 0);
1628 
1629       switch (mode) {
1630       case nir_var_mem_ssbo:
1631          if (addr_format_is_global(addr_format, mode))
1632             op = get_store_global_op_from_addr_format(addr_format);
1633          else
1634             op = nir_intrinsic_store_ssbo;
1635          break;
1636       case nir_var_mem_global:
1637          assert(addr_format_is_global(addr_format, mode));
1638          op = get_store_global_op_from_addr_format(addr_format);
1639          break;
1640       case nir_var_mem_shared:
1641          assert(addr_format_is_offset(addr_format, mode));
1642          op = nir_intrinsic_store_shared;
1643          break;
1644       case nir_var_mem_task_payload:
1645          assert(addr_format_is_offset(addr_format, mode));
1646          op = nir_intrinsic_store_task_payload;
1647          break;
1648       case nir_var_shader_temp:
1649       case nir_var_function_temp:
1650          if (addr_format_is_offset(addr_format, mode)) {
1651             op = nir_intrinsic_store_scratch;
1652          } else {
1653             assert(addr_format_is_global(addr_format, mode));
1654             op = get_store_global_op_from_addr_format(addr_format);
1655          }
1656          break;
1657       default:
1658          unreachable("Unsupported explicit IO variable mode");
1659       }
1660       break;
1661 
1662    case nir_intrinsic_store_deref_block_intel:
1663       assert(write_mask == 0);
1664 
1665       switch (mode) {
1666       case nir_var_mem_ssbo:
1667          if (addr_format_is_global(addr_format, mode))
1668             op = nir_intrinsic_store_global_block_intel;
1669          else
1670             op = nir_intrinsic_store_ssbo_block_intel;
1671          break;
1672       case nir_var_mem_global:
1673          op = nir_intrinsic_store_global_block_intel;
1674          break;
1675       case nir_var_mem_shared:
1676          op = nir_intrinsic_store_shared_block_intel;
1677          break;
1678       default:
1679          unreachable("Unsupported explicit IO variable mode");
1680       }
1681       break;
1682 
1683    default:
1684       unreachable("Invalid intrinsic");
1685    }
1686 
1687    nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
1688 
1689    if (value->bit_size == 1) {
1690       /* For shared, we can go ahead and use NIR's and/or the back-end's
1691        * standard encoding for booleans rather than forcing a 0/1 boolean.
1692        * This should save an instruction or two.
1693        *
1694        * TODO: Make the native bool bit_size an option.
1695        */
1696       if (mode == nir_var_mem_shared ||
1697           mode == nir_var_shader_temp ||
1698           mode == nir_var_function_temp)
1699          value = nir_b2b32(b, value);
1700       else
1701          value = nir_b2i(b, value, 32);
1702    }
1703 
1704    store->src[0] = nir_src_for_ssa(value);
1705    if (addr_format_is_global(addr_format, mode)) {
1706       store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1707    } else if (addr_format_is_offset(addr_format, mode)) {
1708       assert(addr->num_components == 1);
1709       store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1710    } else {
1711       store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1712       store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1713    }
1714 
1715    nir_intrinsic_set_write_mask(store, write_mask);
1716 
1717    if (nir_intrinsic_has_access(store))
1718       nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
1719 
1720    nir_intrinsic_set_align(store, align_mul, align_offset);
1721 
1722    assert(value->num_components == 1 ||
1723           value->num_components == intrin->num_components);
1724    store->num_components = value->num_components;
1725 
1726    assert(value->bit_size % 8 == 0);
1727 
1728    if (addr_format_needs_bounds_check(addr_format)) {
1729       /* TODO: Better handle block_intel. */
1730       const unsigned store_size = (value->bit_size / 8) * store->num_components;
1731       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
1732 
1733       nir_builder_instr_insert(b, &store->instr);
1734 
1735       nir_pop_if(b, NULL);
1736    } else {
1737       nir_builder_instr_insert(b, &store->instr);
1738    }
1739 }
1740 
1741 static nir_ssa_def *
build_explicit_io_atomic(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes)1742 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
1743                          nir_ssa_def *addr, nir_address_format addr_format,
1744                          nir_variable_mode modes)
1745 {
1746    modes = canonicalize_generic_modes(modes);
1747 
1748    if (util_bitcount(modes) > 1) {
1749       if (addr_format_is_global(addr_format, modes)) {
1750          return build_explicit_io_atomic(b, intrin, addr, addr_format,
1751                                          nir_var_mem_global);
1752       } else if (modes & nir_var_function_temp) {
1753          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1754                                                       nir_var_function_temp));
1755          nir_ssa_def *res1 =
1756             build_explicit_io_atomic(b, intrin, addr, addr_format,
1757                                      nir_var_function_temp);
1758          nir_push_else(b, NULL);
1759          nir_ssa_def *res2 =
1760             build_explicit_io_atomic(b, intrin, addr, addr_format,
1761                                      modes & ~nir_var_function_temp);
1762          nir_pop_if(b, NULL);
1763          return nir_if_phi(b, res1, res2);
1764       } else {
1765          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1766                                                       nir_var_mem_shared));
1767          assert(modes & nir_var_mem_shared);
1768          nir_ssa_def *res1 =
1769             build_explicit_io_atomic(b, intrin, addr, addr_format,
1770                                      nir_var_mem_shared);
1771          nir_push_else(b, NULL);
1772          assert(modes & nir_var_mem_global);
1773          nir_ssa_def *res2 =
1774             build_explicit_io_atomic(b, intrin, addr, addr_format,
1775                                      nir_var_mem_global);
1776          nir_pop_if(b, NULL);
1777          return nir_if_phi(b, res1, res2);
1778       }
1779    }
1780 
1781    assert(util_bitcount(modes) == 1);
1782    const nir_variable_mode mode = modes;
1783 
1784    const unsigned num_data_srcs =
1785       nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
1786 
1787    nir_intrinsic_op op;
1788    switch (mode) {
1789    case nir_var_mem_ssbo:
1790       if (addr_format_is_global(addr_format, mode))
1791          op = global_atomic_for_deref(addr_format, intrin->intrinsic);
1792       else
1793          op = ssbo_atomic_for_deref(intrin->intrinsic);
1794       break;
1795    case nir_var_mem_global:
1796       assert(addr_format_is_global(addr_format, mode));
1797       op = global_atomic_for_deref(addr_format, intrin->intrinsic);
1798       break;
1799    case nir_var_mem_shared:
1800       assert(addr_format_is_offset(addr_format, mode));
1801       op = shared_atomic_for_deref(intrin->intrinsic);
1802       break;
1803    case nir_var_mem_task_payload:
1804       assert(addr_format_is_offset(addr_format, mode));
1805       op = task_payload_atomic_for_deref(intrin->intrinsic);
1806       break;
1807    default:
1808       unreachable("Unsupported explicit IO variable mode");
1809    }
1810 
1811    nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
1812 
1813    unsigned src = 0;
1814    if (addr_format_is_global(addr_format, mode)) {
1815       atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1816    } else if (addr_format_is_offset(addr_format, mode)) {
1817       assert(addr->num_components == 1);
1818       atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1819    } else {
1820       atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1821       atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1822    }
1823    for (unsigned i = 0; i < num_data_srcs; i++) {
1824       atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
1825    }
1826 
1827    /* Global atomics don't have access flags because they assume that the
1828     * address may be non-uniform.
1829     */
1830    if (nir_intrinsic_has_access(atomic))
1831       nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
1832 
1833    assert(intrin->dest.ssa.num_components == 1);
1834    nir_ssa_dest_init(&atomic->instr, &atomic->dest,
1835                      1, intrin->dest.ssa.bit_size, NULL);
1836 
1837    assert(atomic->dest.ssa.bit_size % 8 == 0);
1838 
1839    if (addr_format_needs_bounds_check(addr_format)) {
1840       const unsigned atomic_size = atomic->dest.ssa.bit_size / 8;
1841       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
1842 
1843       nir_builder_instr_insert(b, &atomic->instr);
1844 
1845       nir_pop_if(b, NULL);
1846       return nir_if_phi(b, &atomic->dest.ssa,
1847                            nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size));
1848    } else {
1849       nir_builder_instr_insert(b, &atomic->instr);
1850       return &atomic->dest.ssa;
1851    }
1852 }
1853 
1854 nir_ssa_def *
nir_explicit_io_address_from_deref(nir_builder * b,nir_deref_instr * deref,nir_ssa_def * base_addr,nir_address_format addr_format)1855 nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
1856                                    nir_ssa_def *base_addr,
1857                                    nir_address_format addr_format)
1858 {
1859    assert(deref->dest.is_ssa);
1860    switch (deref->deref_type) {
1861    case nir_deref_type_var:
1862       return build_addr_for_var(b, deref->var, addr_format);
1863 
1864    case nir_deref_type_ptr_as_array:
1865    case nir_deref_type_array: {
1866       unsigned stride = nir_deref_instr_array_stride(deref);
1867       assert(stride > 0);
1868 
1869       unsigned offset_bit_size = addr_get_offset_bit_size(base_addr, addr_format);
1870       nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1871       nir_ssa_def *offset;
1872 
1873       /* If the access chain has been declared in-bounds, then we know it doesn't
1874        * overflow the type.  For nir_deref_type_array, this implies it cannot be
1875        * negative. Also, since types in NIR have a maximum 32-bit size, we know the
1876        * final result will fit in a 32-bit value so we can convert the index to
1877        * 32-bit before multiplying and save ourselves from a 64-bit multiply.
1878        */
1879       if (deref->arr.in_bounds && deref->deref_type == nir_deref_type_array) {
1880          index = nir_u2u32(b, index);
1881          offset = nir_u2u(b, nir_amul_imm(b, index, stride), offset_bit_size);
1882       } else {
1883          index = nir_i2i(b, index, offset_bit_size);
1884          offset = nir_amul_imm(b, index, stride);
1885       }
1886 
1887       return build_addr_iadd(b, base_addr, addr_format, deref->modes, offset);
1888    }
1889 
1890    case nir_deref_type_array_wildcard:
1891       unreachable("Wildcards should be lowered by now");
1892       break;
1893 
1894    case nir_deref_type_struct: {
1895       nir_deref_instr *parent = nir_deref_instr_parent(deref);
1896       int offset = glsl_get_struct_field_offset(parent->type,
1897                                                 deref->strct.index);
1898       assert(offset >= 0);
1899       return build_addr_iadd_imm(b, base_addr, addr_format,
1900                                  deref->modes, offset);
1901    }
1902 
1903    case nir_deref_type_cast:
1904       /* Nothing to do here */
1905       return base_addr;
1906    }
1907 
1908    unreachable("Invalid NIR deref type");
1909 }
1910 
1911 void
nir_lower_explicit_io_instr(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format)1912 nir_lower_explicit_io_instr(nir_builder *b,
1913                             nir_intrinsic_instr *intrin,
1914                             nir_ssa_def *addr,
1915                             nir_address_format addr_format)
1916 {
1917    b->cursor = nir_after_instr(&intrin->instr);
1918 
1919    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1920    unsigned vec_stride = glsl_get_explicit_stride(deref->type);
1921    unsigned scalar_size = type_scalar_size_bytes(deref->type);
1922    assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
1923    assert(vec_stride == 0 || vec_stride >= scalar_size);
1924 
1925    uint32_t align_mul, align_offset;
1926    if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) {
1927       /* If we don't have an alignment from the deref, assume scalar */
1928       align_mul = scalar_size;
1929       align_offset = 0;
1930    }
1931 
1932    switch (intrin->intrinsic) {
1933    case nir_intrinsic_load_deref: {
1934       nir_ssa_def *value;
1935       if (vec_stride > scalar_size) {
1936          nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { NULL, };
1937          for (unsigned i = 0; i < intrin->num_components; i++) {
1938             unsigned comp_offset = i * vec_stride;
1939             nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1940                                                          deref->modes,
1941                                                          comp_offset);
1942             comps[i] = build_explicit_io_load(b, intrin, comp_addr,
1943                                               addr_format, deref->modes,
1944                                               align_mul,
1945                                               (align_offset + comp_offset) %
1946                                                  align_mul,
1947                                               1);
1948          }
1949          value = nir_vec(b, comps, intrin->num_components);
1950       } else {
1951          value = build_explicit_io_load(b, intrin, addr, addr_format,
1952                                         deref->modes, align_mul, align_offset,
1953                                         intrin->num_components);
1954       }
1955       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
1956       break;
1957    }
1958 
1959    case nir_intrinsic_store_deref: {
1960       assert(intrin->src[1].is_ssa);
1961       nir_ssa_def *value = intrin->src[1].ssa;
1962       nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
1963       if (vec_stride > scalar_size) {
1964          for (unsigned i = 0; i < intrin->num_components; i++) {
1965             if (!(write_mask & (1 << i)))
1966                continue;
1967 
1968             unsigned comp_offset = i * vec_stride;
1969             nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1970                                                          deref->modes,
1971                                                          comp_offset);
1972             build_explicit_io_store(b, intrin, comp_addr, addr_format,
1973                                     deref->modes, align_mul,
1974                                     (align_offset + comp_offset) % align_mul,
1975                                     nir_channel(b, value, i), 1);
1976          }
1977       } else {
1978          build_explicit_io_store(b, intrin, addr, addr_format,
1979                                  deref->modes, align_mul, align_offset,
1980                                  value, write_mask);
1981       }
1982       break;
1983    }
1984 
1985    case nir_intrinsic_load_deref_block_intel: {
1986       nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
1987                                                   deref->modes,
1988                                                   align_mul, align_offset,
1989                                                   intrin->num_components);
1990       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
1991       break;
1992    }
1993 
1994    case nir_intrinsic_store_deref_block_intel: {
1995       assert(intrin->src[1].is_ssa);
1996       nir_ssa_def *value = intrin->src[1].ssa;
1997       const nir_component_mask_t write_mask = 0;
1998       build_explicit_io_store(b, intrin, addr, addr_format,
1999                               deref->modes, align_mul, align_offset,
2000                               value, write_mask);
2001       break;
2002    }
2003 
2004    default: {
2005       nir_ssa_def *value =
2006          build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
2007       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
2008       break;
2009    }
2010    }
2011 
2012    nir_instr_remove(&intrin->instr);
2013 }
2014 
2015 bool
nir_get_explicit_deref_align(nir_deref_instr * deref,bool default_to_type_align,uint32_t * align_mul,uint32_t * align_offset)2016 nir_get_explicit_deref_align(nir_deref_instr *deref,
2017                              bool default_to_type_align,
2018                              uint32_t *align_mul,
2019                              uint32_t *align_offset)
2020 {
2021    if (deref->deref_type == nir_deref_type_var) {
2022       /* If we see a variable, align_mul is effectively infinite because we
2023        * know the offset exactly (up to the offset of the base pointer for the
2024        * given variable mode).   We have to pick something so we choose 256B
2025        * as an arbitrary alignment which seems high enough for any reasonable
2026        * wide-load use-case.  Back-ends should clamp alignments down if 256B
2027        * is too large for some reason.
2028        */
2029       *align_mul = 256;
2030       *align_offset = deref->var->data.driver_location % 256;
2031       return true;
2032    }
2033 
2034    /* If we're a cast deref that has an alignment, use that. */
2035    if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) {
2036       *align_mul = deref->cast.align_mul;
2037       *align_offset = deref->cast.align_offset;
2038       return true;
2039    }
2040 
2041    /* Otherwise, we need to compute the alignment based on the parent */
2042    nir_deref_instr *parent = nir_deref_instr_parent(deref);
2043    if (parent == NULL) {
2044       assert(deref->deref_type == nir_deref_type_cast);
2045       if (default_to_type_align) {
2046          /* If we don't have a parent, assume the type's alignment, if any. */
2047          unsigned type_align = glsl_get_explicit_alignment(deref->type);
2048          if (type_align == 0)
2049             return false;
2050 
2051          *align_mul = type_align;
2052          *align_offset = 0;
2053          return true;
2054       } else {
2055          return false;
2056       }
2057    }
2058 
2059    uint32_t parent_mul, parent_offset;
2060    if (!nir_get_explicit_deref_align(parent, default_to_type_align,
2061                                      &parent_mul, &parent_offset))
2062       return false;
2063 
2064    switch (deref->deref_type) {
2065    case nir_deref_type_var:
2066       unreachable("Handled above");
2067 
2068    case nir_deref_type_array:
2069    case nir_deref_type_array_wildcard:
2070    case nir_deref_type_ptr_as_array: {
2071       const unsigned stride = nir_deref_instr_array_stride(deref);
2072       if (stride == 0)
2073          return false;
2074 
2075       if (deref->deref_type != nir_deref_type_array_wildcard &&
2076           nir_src_is_const(deref->arr.index)) {
2077          unsigned offset = nir_src_as_uint(deref->arr.index) * stride;
2078          *align_mul = parent_mul;
2079          *align_offset = (parent_offset + offset) % parent_mul;
2080       } else {
2081          /* If this is a wildcard or an indirect deref, we have to go with the
2082           * power-of-two gcd.
2083           */
2084          *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1));
2085          *align_offset = parent_offset % *align_mul;
2086       }
2087       return true;
2088    }
2089 
2090    case nir_deref_type_struct: {
2091       const int offset = glsl_get_struct_field_offset(parent->type,
2092                                                       deref->strct.index);
2093       if (offset < 0)
2094          return false;
2095 
2096       *align_mul = parent_mul;
2097       *align_offset = (parent_offset + offset) % parent_mul;
2098       return true;
2099    }
2100 
2101    case nir_deref_type_cast:
2102       /* We handled the explicit alignment case above. */
2103       assert(deref->cast.align_mul == 0);
2104       *align_mul = parent_mul;
2105       *align_offset = parent_offset;
2106       return true;
2107    }
2108 
2109    unreachable("Invalid deref_instr_type");
2110 }
2111 
2112 static void
lower_explicit_io_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format)2113 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
2114                         nir_address_format addr_format)
2115 {
2116    /* Just delete the deref if it's not used.  We can't use
2117     * nir_deref_instr_remove_if_unused here because it may remove more than
2118     * one deref which could break our list walking since we walk the list
2119     * backwards.
2120     */
2121    assert(list_is_empty(&deref->dest.ssa.if_uses));
2122    if (list_is_empty(&deref->dest.ssa.uses)) {
2123       nir_instr_remove(&deref->instr);
2124       return;
2125    }
2126 
2127    b->cursor = nir_after_instr(&deref->instr);
2128 
2129    nir_ssa_def *base_addr = NULL;
2130    if (deref->deref_type != nir_deref_type_var) {
2131       assert(deref->parent.is_ssa);
2132       base_addr = deref->parent.ssa;
2133    }
2134 
2135    nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
2136                                                           addr_format);
2137    assert(addr->bit_size == deref->dest.ssa.bit_size);
2138    assert(addr->num_components == deref->dest.ssa.num_components);
2139 
2140    nir_instr_remove(&deref->instr);
2141    nir_ssa_def_rewrite_uses(&deref->dest.ssa, addr);
2142 }
2143 
2144 static void
lower_explicit_io_access(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2145 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
2146                          nir_address_format addr_format)
2147 {
2148    assert(intrin->src[0].is_ssa);
2149    nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
2150 }
2151 
2152 static void
lower_explicit_io_array_length(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2153 lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
2154                                nir_address_format addr_format)
2155 {
2156    b->cursor = nir_after_instr(&intrin->instr);
2157 
2158    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2159 
2160    assert(glsl_type_is_array(deref->type));
2161    assert(glsl_get_length(deref->type) == 0);
2162    assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
2163    unsigned stride = glsl_get_explicit_stride(deref->type);
2164    assert(stride > 0);
2165 
2166    nir_ssa_def *addr = &deref->dest.ssa;
2167    nir_ssa_def *index = addr_to_index(b, addr, addr_format);
2168    nir_ssa_def *offset = addr_to_offset(b, addr, addr_format);
2169    unsigned access = nir_intrinsic_access(intrin);
2170 
2171    nir_ssa_def *arr_size = nir_get_ssbo_size(b, index, .access=access);
2172    arr_size = nir_usub_sat(b, arr_size, offset);
2173    arr_size = nir_udiv_imm(b, arr_size, stride);
2174 
2175    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, arr_size);
2176    nir_instr_remove(&intrin->instr);
2177 }
2178 
2179 static void
lower_explicit_io_mode_check(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2180 lower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin,
2181                              nir_address_format addr_format)
2182 {
2183    if (addr_format_is_global(addr_format, 0)) {
2184       /* If the address format is always global, then the driver can use
2185        * global addresses regardless of the mode.  In that case, don't create
2186        * a check, just whack the intrinsic to addr_mode_is and delegate to the
2187        * driver lowering.
2188        */
2189       intrin->intrinsic = nir_intrinsic_addr_mode_is;
2190       return;
2191    }
2192 
2193    assert(intrin->src[0].is_ssa);
2194    nir_ssa_def *addr = intrin->src[0].ssa;
2195 
2196    b->cursor = nir_instr_remove(&intrin->instr);
2197 
2198    nir_ssa_def *is_mode =
2199       build_runtime_addr_mode_check(b, addr, addr_format,
2200                                     nir_intrinsic_memory_modes(intrin));
2201 
2202    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, is_mode);
2203 }
2204 
2205 static bool
nir_lower_explicit_io_impl(nir_function_impl * impl,nir_variable_mode modes,nir_address_format addr_format)2206 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
2207                            nir_address_format addr_format)
2208 {
2209    bool progress = false;
2210 
2211    nir_builder b;
2212    nir_builder_init(&b, impl);
2213 
2214    /* Walk in reverse order so that we can see the full deref chain when we
2215     * lower the access operations.  We lower them assuming that the derefs
2216     * will be turned into address calculations later.
2217     */
2218    nir_foreach_block_reverse(block, impl) {
2219       nir_foreach_instr_reverse_safe(instr, block) {
2220          switch (instr->type) {
2221          case nir_instr_type_deref: {
2222             nir_deref_instr *deref = nir_instr_as_deref(instr);
2223             if (nir_deref_mode_is_in_set(deref, modes)) {
2224                lower_explicit_io_deref(&b, deref, addr_format);
2225                progress = true;
2226             }
2227             break;
2228          }
2229 
2230          case nir_instr_type_intrinsic: {
2231             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2232             switch (intrin->intrinsic) {
2233             case nir_intrinsic_load_deref:
2234             case nir_intrinsic_store_deref:
2235             case nir_intrinsic_load_deref_block_intel:
2236             case nir_intrinsic_store_deref_block_intel:
2237             case nir_intrinsic_deref_atomic_add:
2238             case nir_intrinsic_deref_atomic_imin:
2239             case nir_intrinsic_deref_atomic_umin:
2240             case nir_intrinsic_deref_atomic_imax:
2241             case nir_intrinsic_deref_atomic_umax:
2242             case nir_intrinsic_deref_atomic_and:
2243             case nir_intrinsic_deref_atomic_or:
2244             case nir_intrinsic_deref_atomic_xor:
2245             case nir_intrinsic_deref_atomic_exchange:
2246             case nir_intrinsic_deref_atomic_comp_swap:
2247             case nir_intrinsic_deref_atomic_fadd:
2248             case nir_intrinsic_deref_atomic_fmin:
2249             case nir_intrinsic_deref_atomic_fmax:
2250             case nir_intrinsic_deref_atomic_fcomp_swap: {
2251                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2252                if (nir_deref_mode_is_in_set(deref, modes)) {
2253                   lower_explicit_io_access(&b, intrin, addr_format);
2254                   progress = true;
2255                }
2256                break;
2257             }
2258 
2259             case nir_intrinsic_deref_buffer_array_length: {
2260                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2261                if (nir_deref_mode_is_in_set(deref, modes)) {
2262                   lower_explicit_io_array_length(&b, intrin, addr_format);
2263                   progress = true;
2264                }
2265                break;
2266             }
2267 
2268             case nir_intrinsic_deref_mode_is: {
2269                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2270                if (nir_deref_mode_is_in_set(deref, modes)) {
2271                   lower_explicit_io_mode_check(&b, intrin, addr_format);
2272                   progress = true;
2273                }
2274                break;
2275             }
2276 
2277             default:
2278                break;
2279             }
2280             break;
2281          }
2282 
2283          default:
2284             /* Nothing to do */
2285             break;
2286          }
2287       }
2288    }
2289 
2290    if (progress) {
2291       nir_metadata_preserve(impl, nir_metadata_block_index |
2292                                   nir_metadata_dominance);
2293    } else {
2294       nir_metadata_preserve(impl, nir_metadata_all);
2295    }
2296 
2297    return progress;
2298 }
2299 
2300 /** Lower explicitly laid out I/O access to byte offset/address intrinsics
2301  *
2302  * This pass is intended to be used for any I/O which touches memory external
2303  * to the shader or which is directly visible to the client.  It requires that
2304  * all data types in the given modes have a explicit stride/offset decorations
2305  * to tell it exactly how to calculate the offset/address for the given load,
2306  * store, or atomic operation.  If the offset/stride information does not come
2307  * from the client explicitly (as with shared variables in GL or Vulkan),
2308  * nir_lower_vars_to_explicit_types() can be used to add them.
2309  *
2310  * Unlike nir_lower_io, this pass is fully capable of handling incomplete
2311  * pointer chains which may contain cast derefs.  It does so by walking the
2312  * deref chain backwards and simply replacing each deref, one at a time, with
2313  * the appropriate address calculation.  The pass takes a nir_address_format
2314  * parameter which describes how the offset or address is to be represented
2315  * during calculations.  By ensuring that the address is always in a
2316  * consistent format, pointers can safely be conjured from thin air by the
2317  * driver, stored to variables, passed through phis, etc.
2318  *
2319  * The one exception to the simple algorithm described above is for handling
2320  * row-major matrices in which case we may look down one additional level of
2321  * the deref chain.
2322  *
2323  * This pass is also capable of handling OpenCL generic pointers.  If the
2324  * address mode is global, it will lower any ambiguous (more than one mode)
2325  * access to global and pass through the deref_mode_is run-time checks as
2326  * addr_mode_is.  This assumes the driver has somehow mapped shared and
2327  * scratch memory to the global address space.  For other modes such as
2328  * 62bit_generic, there is an enum embedded in the address and we lower
2329  * ambiguous access to an if-ladder and deref_mode_is to a check against the
2330  * embedded enum.  If nir_lower_explicit_io is called on any shader that
2331  * contains generic pointers, it must either be used on all of the generic
2332  * modes or none.
2333  */
2334 bool
nir_lower_explicit_io(nir_shader * shader,nir_variable_mode modes,nir_address_format addr_format)2335 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
2336                       nir_address_format addr_format)
2337 {
2338    bool progress = false;
2339 
2340    nir_foreach_function(function, shader) {
2341       if (function->impl &&
2342           nir_lower_explicit_io_impl(function->impl, modes, addr_format))
2343          progress = true;
2344    }
2345 
2346    return progress;
2347 }
2348 
2349 static bool
nir_lower_vars_to_explicit_types_impl(nir_function_impl * impl,nir_variable_mode modes,glsl_type_size_align_func type_info)2350 nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
2351                                       nir_variable_mode modes,
2352                                       glsl_type_size_align_func type_info)
2353 {
2354    bool progress = false;
2355 
2356    nir_foreach_block(block, impl) {
2357       nir_foreach_instr(instr, block) {
2358          if (instr->type != nir_instr_type_deref)
2359             continue;
2360 
2361          nir_deref_instr *deref = nir_instr_as_deref(instr);
2362          if (!nir_deref_mode_is_in_set(deref, modes))
2363             continue;
2364 
2365          unsigned size, alignment;
2366          const struct glsl_type *new_type =
2367             glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
2368          if (new_type != deref->type) {
2369             progress = true;
2370             deref->type = new_type;
2371          }
2372          if (deref->deref_type == nir_deref_type_cast) {
2373             /* See also glsl_type::get_explicit_type_for_size_align() */
2374             unsigned new_stride = align(size, alignment);
2375             if (new_stride != deref->cast.ptr_stride) {
2376                deref->cast.ptr_stride = new_stride;
2377                progress = true;
2378             }
2379          }
2380       }
2381    }
2382 
2383    if (progress) {
2384       nir_metadata_preserve(impl, nir_metadata_block_index |
2385                                   nir_metadata_dominance |
2386                                   nir_metadata_live_ssa_defs |
2387                                   nir_metadata_loop_analysis);
2388    } else {
2389       nir_metadata_preserve(impl, nir_metadata_all);
2390    }
2391 
2392    return progress;
2393 }
2394 
2395 static bool
lower_vars_to_explicit(nir_shader * shader,struct exec_list * vars,nir_variable_mode mode,glsl_type_size_align_func type_info)2396 lower_vars_to_explicit(nir_shader *shader,
2397                        struct exec_list *vars, nir_variable_mode mode,
2398                        glsl_type_size_align_func type_info)
2399 {
2400    bool progress = false;
2401    unsigned offset;
2402    switch (mode) {
2403    case nir_var_uniform:
2404       assert(shader->info.stage == MESA_SHADER_KERNEL);
2405       offset = 0;
2406       break;
2407    case nir_var_function_temp:
2408    case nir_var_shader_temp:
2409       offset = shader->scratch_size;
2410       break;
2411    case nir_var_mem_shared:
2412       offset = shader->info.shared_size;
2413       break;
2414    case nir_var_mem_task_payload:
2415       offset = shader->info.task_payload_size;
2416       break;
2417    case nir_var_mem_global:
2418       offset = shader->global_mem_size;
2419       break;
2420    case nir_var_mem_constant:
2421       offset = shader->constant_data_size;
2422       break;
2423    case nir_var_shader_call_data:
2424    case nir_var_ray_hit_attrib:
2425       offset = 0;
2426       break;
2427    default:
2428       unreachable("Unsupported mode");
2429    }
2430    nir_foreach_variable_in_list(var, vars) {
2431       if (var->data.mode != mode)
2432          continue;
2433 
2434       unsigned size, align;
2435       const struct glsl_type *explicit_type =
2436          glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align);
2437 
2438       if (explicit_type != var->type)
2439          var->type = explicit_type;
2440 
2441       UNUSED bool is_empty_struct =
2442          glsl_type_is_struct_or_ifc(explicit_type) &&
2443          glsl_get_length(explicit_type) == 0;
2444 
2445       assert(util_is_power_of_two_nonzero(align) || is_empty_struct);
2446       var->data.driver_location = ALIGN_POT(offset, align);
2447       offset = var->data.driver_location + size;
2448       progress = true;
2449    }
2450 
2451    switch (mode) {
2452    case nir_var_uniform:
2453       assert(shader->info.stage == MESA_SHADER_KERNEL);
2454       shader->num_uniforms = offset;
2455       break;
2456    case nir_var_shader_temp:
2457    case nir_var_function_temp:
2458       shader->scratch_size = offset;
2459       break;
2460    case nir_var_mem_shared:
2461       shader->info.shared_size = offset;
2462       break;
2463    case nir_var_mem_task_payload:
2464       shader->info.task_payload_size = offset;
2465       break;
2466    case nir_var_mem_global:
2467       shader->global_mem_size = offset;
2468       break;
2469    case nir_var_mem_constant:
2470       shader->constant_data_size = offset;
2471       break;
2472    case nir_var_shader_call_data:
2473    case nir_var_ray_hit_attrib:
2474       break;
2475    default:
2476       unreachable("Unsupported mode");
2477    }
2478 
2479    return progress;
2480 }
2481 
2482 /* If nir_lower_vars_to_explicit_types is called on any shader that contains
2483  * generic pointers, it must either be used on all of the generic modes or
2484  * none.
2485  */
2486 bool
nir_lower_vars_to_explicit_types(nir_shader * shader,nir_variable_mode modes,glsl_type_size_align_func type_info)2487 nir_lower_vars_to_explicit_types(nir_shader *shader,
2488                                  nir_variable_mode modes,
2489                                  glsl_type_size_align_func type_info)
2490 {
2491    /* TODO: Situations which need to be handled to support more modes:
2492     * - row-major matrices
2493     * - compact shader inputs/outputs
2494     * - interface types
2495     */
2496    ASSERTED nir_variable_mode supported =
2497       nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant |
2498       nir_var_shader_temp | nir_var_function_temp | nir_var_uniform |
2499       nir_var_shader_call_data | nir_var_ray_hit_attrib |
2500       nir_var_mem_task_payload;
2501    assert(!(modes & ~supported) && "unsupported");
2502 
2503    bool progress = false;
2504 
2505    if (modes & nir_var_uniform)
2506       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info);
2507    if (modes & nir_var_mem_global)
2508       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info);
2509 
2510    if (modes & nir_var_mem_shared) {
2511       assert(!shader->info.shared_memory_explicit_layout);
2512       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info);
2513    }
2514 
2515    if (modes & nir_var_shader_temp)
2516       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info);
2517    if (modes & nir_var_mem_constant)
2518       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info);
2519    if (modes & nir_var_shader_call_data)
2520       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info);
2521    if (modes & nir_var_ray_hit_attrib)
2522       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info);
2523    if (modes & nir_var_mem_task_payload)
2524       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_task_payload, type_info);
2525 
2526    nir_foreach_function(function, shader) {
2527       if (function->impl) {
2528          if (modes & nir_var_function_temp)
2529             progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info);
2530 
2531          progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info);
2532       }
2533    }
2534 
2535    return progress;
2536 }
2537 
2538 static void
write_constant(void * dst,size_t dst_size,const nir_constant * c,const struct glsl_type * type)2539 write_constant(void *dst, size_t dst_size,
2540                const nir_constant *c, const struct glsl_type *type)
2541 {
2542    if (glsl_type_is_vector_or_scalar(type)) {
2543       const unsigned num_components = glsl_get_vector_elements(type);
2544       const unsigned bit_size = glsl_get_bit_size(type);
2545       if (bit_size == 1) {
2546          /* Booleans are special-cased to be 32-bit
2547           *
2548           * TODO: Make the native bool bit_size an option.
2549           */
2550          assert(num_components * 4 <= dst_size);
2551          for (unsigned i = 0; i < num_components; i++) {
2552             int32_t b32 = -(int)c->values[i].b;
2553             memcpy((char *)dst + i * 4, &b32, 4);
2554          }
2555       } else {
2556          assert(bit_size >= 8 && bit_size % 8 == 0);
2557          const unsigned byte_size = bit_size / 8;
2558          assert(num_components * byte_size <= dst_size);
2559          for (unsigned i = 0; i < num_components; i++) {
2560             /* Annoyingly, thanks to packed structs, we can't make any
2561              * assumptions about the alignment of dst.  To avoid any strange
2562              * issues with unaligned writes, we always use memcpy.
2563              */
2564             memcpy((char *)dst + i * byte_size, &c->values[i], byte_size);
2565          }
2566       }
2567    } else if (glsl_type_is_array_or_matrix(type)) {
2568       const unsigned array_len = glsl_get_length(type);
2569       const unsigned stride = glsl_get_explicit_stride(type);
2570       assert(stride > 0);
2571       const struct glsl_type *elem_type = glsl_get_array_element(type);
2572       for (unsigned i = 0; i < array_len; i++) {
2573          unsigned elem_offset = i * stride;
2574          assert(elem_offset < dst_size);
2575          write_constant((char *)dst + elem_offset, dst_size - elem_offset,
2576                         c->elements[i], elem_type);
2577       }
2578    } else {
2579       assert(glsl_type_is_struct_or_ifc(type));
2580       const unsigned num_fields = glsl_get_length(type);
2581       for (unsigned i = 0; i < num_fields; i++) {
2582          const int field_offset = glsl_get_struct_field_offset(type, i);
2583          assert(field_offset >= 0 && field_offset < dst_size);
2584          const struct glsl_type *field_type = glsl_get_struct_field(type, i);
2585          write_constant((char *)dst + field_offset, dst_size - field_offset,
2586                         c->elements[i], field_type);
2587       }
2588    }
2589 }
2590 
2591 void
nir_gather_explicit_io_initializers(nir_shader * shader,void * dst,size_t dst_size,nir_variable_mode mode)2592 nir_gather_explicit_io_initializers(nir_shader *shader,
2593                                     void *dst, size_t dst_size,
2594                                     nir_variable_mode mode)
2595 {
2596    /* It doesn't really make sense to gather initializers for more than one
2597     * mode at a time.  If this ever becomes well-defined, we can drop the
2598     * assert then.
2599     */
2600    assert(util_bitcount(mode) == 1);
2601 
2602    nir_foreach_variable_with_modes(var, shader, mode) {
2603       assert(var->data.driver_location < dst_size);
2604       write_constant((char *)dst + var->data.driver_location,
2605                      dst_size - var->data.driver_location,
2606                      var->constant_initializer, var->type);
2607    }
2608 }
2609 
2610 /**
2611  * Return the offset source for a load/store intrinsic.
2612  */
2613 nir_src *
nir_get_io_offset_src(nir_intrinsic_instr * instr)2614 nir_get_io_offset_src(nir_intrinsic_instr *instr)
2615 {
2616    switch (instr->intrinsic) {
2617    case nir_intrinsic_load_input:
2618    case nir_intrinsic_load_output:
2619    case nir_intrinsic_load_shared:
2620    case nir_intrinsic_load_task_payload:
2621    case nir_intrinsic_load_uniform:
2622    case nir_intrinsic_load_kernel_input:
2623    case nir_intrinsic_load_global:
2624    case nir_intrinsic_load_global_2x32:
2625    case nir_intrinsic_load_global_constant:
2626    case nir_intrinsic_load_scratch:
2627    case nir_intrinsic_load_fs_input_interp_deltas:
2628    case nir_intrinsic_shared_atomic_add:
2629    case nir_intrinsic_shared_atomic_and:
2630    case nir_intrinsic_shared_atomic_comp_swap:
2631    case nir_intrinsic_shared_atomic_exchange:
2632    case nir_intrinsic_shared_atomic_fadd:
2633    case nir_intrinsic_shared_atomic_fcomp_swap:
2634    case nir_intrinsic_shared_atomic_fmax:
2635    case nir_intrinsic_shared_atomic_fmin:
2636    case nir_intrinsic_shared_atomic_imax:
2637    case nir_intrinsic_shared_atomic_imin:
2638    case nir_intrinsic_shared_atomic_or:
2639    case nir_intrinsic_shared_atomic_umax:
2640    case nir_intrinsic_shared_atomic_umin:
2641    case nir_intrinsic_shared_atomic_xor:
2642    case nir_intrinsic_task_payload_atomic_add:
2643    case nir_intrinsic_task_payload_atomic_imin:
2644    case nir_intrinsic_task_payload_atomic_umin:
2645    case nir_intrinsic_task_payload_atomic_imax:
2646    case nir_intrinsic_task_payload_atomic_umax:
2647    case nir_intrinsic_task_payload_atomic_and:
2648    case nir_intrinsic_task_payload_atomic_or:
2649    case nir_intrinsic_task_payload_atomic_xor:
2650    case nir_intrinsic_task_payload_atomic_exchange:
2651    case nir_intrinsic_task_payload_atomic_comp_swap:
2652    case nir_intrinsic_task_payload_atomic_fadd:
2653    case nir_intrinsic_task_payload_atomic_fmin:
2654    case nir_intrinsic_task_payload_atomic_fmax:
2655    case nir_intrinsic_task_payload_atomic_fcomp_swap:
2656    case nir_intrinsic_global_atomic_add:
2657    case nir_intrinsic_global_atomic_and:
2658    case nir_intrinsic_global_atomic_comp_swap:
2659    case nir_intrinsic_global_atomic_exchange:
2660    case nir_intrinsic_global_atomic_fadd:
2661    case nir_intrinsic_global_atomic_fcomp_swap:
2662    case nir_intrinsic_global_atomic_fmax:
2663    case nir_intrinsic_global_atomic_fmin:
2664    case nir_intrinsic_global_atomic_imax:
2665    case nir_intrinsic_global_atomic_imin:
2666    case nir_intrinsic_global_atomic_or:
2667    case nir_intrinsic_global_atomic_umax:
2668    case nir_intrinsic_global_atomic_umin:
2669    case nir_intrinsic_global_atomic_xor:
2670       return &instr->src[0];
2671    case nir_intrinsic_load_ubo:
2672    case nir_intrinsic_load_ssbo:
2673    case nir_intrinsic_load_input_vertex:
2674    case nir_intrinsic_load_per_vertex_input:
2675    case nir_intrinsic_load_per_vertex_output:
2676    case nir_intrinsic_load_per_primitive_output:
2677    case nir_intrinsic_load_interpolated_input:
2678    case nir_intrinsic_store_output:
2679    case nir_intrinsic_store_shared:
2680    case nir_intrinsic_store_task_payload:
2681    case nir_intrinsic_store_global:
2682    case nir_intrinsic_store_global_2x32:
2683    case nir_intrinsic_store_scratch:
2684    case nir_intrinsic_ssbo_atomic_add:
2685    case nir_intrinsic_ssbo_atomic_imin:
2686    case nir_intrinsic_ssbo_atomic_umin:
2687    case nir_intrinsic_ssbo_atomic_imax:
2688    case nir_intrinsic_ssbo_atomic_umax:
2689    case nir_intrinsic_ssbo_atomic_and:
2690    case nir_intrinsic_ssbo_atomic_or:
2691    case nir_intrinsic_ssbo_atomic_xor:
2692    case nir_intrinsic_ssbo_atomic_exchange:
2693    case nir_intrinsic_ssbo_atomic_comp_swap:
2694    case nir_intrinsic_ssbo_atomic_fadd:
2695    case nir_intrinsic_ssbo_atomic_fmin:
2696    case nir_intrinsic_ssbo_atomic_fmax:
2697    case nir_intrinsic_ssbo_atomic_fcomp_swap:
2698       return &instr->src[1];
2699    case nir_intrinsic_store_ssbo:
2700    case nir_intrinsic_store_per_vertex_output:
2701    case nir_intrinsic_store_per_primitive_output:
2702       return &instr->src[2];
2703    default:
2704       return NULL;
2705    }
2706 }
2707 
2708 /**
2709  * Return the vertex index source for a load/store per_vertex intrinsic.
2710  */
2711 nir_src *
nir_get_io_arrayed_index_src(nir_intrinsic_instr * instr)2712 nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr)
2713 {
2714    switch (instr->intrinsic) {
2715    case nir_intrinsic_load_per_vertex_input:
2716    case nir_intrinsic_load_per_vertex_output:
2717    case nir_intrinsic_load_per_primitive_output:
2718       return &instr->src[0];
2719    case nir_intrinsic_store_per_vertex_output:
2720    case nir_intrinsic_store_per_primitive_output:
2721       return &instr->src[1];
2722    default:
2723       return NULL;
2724    }
2725 }
2726 
2727 /**
2728  * Return the numeric constant that identify a NULL pointer for each address
2729  * format.
2730  */
2731 const nir_const_value *
nir_address_format_null_value(nir_address_format addr_format)2732 nir_address_format_null_value(nir_address_format addr_format)
2733 {
2734    const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
2735       [nir_address_format_32bit_global] = {{0}},
2736       [nir_address_format_2x32bit_global] = {{0}},
2737       [nir_address_format_64bit_global] = {{0}},
2738       [nir_address_format_64bit_global_32bit_offset] = {{0}},
2739       [nir_address_format_64bit_bounded_global] = {{0}},
2740       [nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}},
2741       [nir_address_format_32bit_index_offset_pack64] = {{.u64 = ~0ull}},
2742       [nir_address_format_vec2_index_32bit_offset] = {{.u32 = ~0}, {.u32 = ~0}, {.u32 = ~0}},
2743       [nir_address_format_32bit_offset] = {{.u32 = ~0}},
2744       [nir_address_format_32bit_offset_as_64bit] = {{.u64 = ~0ull}},
2745       [nir_address_format_62bit_generic] = {{.u64 = 0}},
2746       [nir_address_format_logical] = {{.u32 = ~0}},
2747    };
2748 
2749    assert(addr_format < ARRAY_SIZE(null_values));
2750    return null_values[addr_format];
2751 }
2752 
2753 nir_ssa_def *
nir_build_addr_ieq(nir_builder * b,nir_ssa_def * addr0,nir_ssa_def * addr1,nir_address_format addr_format)2754 nir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
2755                    nir_address_format addr_format)
2756 {
2757    switch (addr_format) {
2758    case nir_address_format_32bit_global:
2759    case nir_address_format_2x32bit_global:
2760    case nir_address_format_64bit_global:
2761    case nir_address_format_64bit_bounded_global:
2762    case nir_address_format_32bit_index_offset:
2763    case nir_address_format_vec2_index_32bit_offset:
2764    case nir_address_format_32bit_offset:
2765    case nir_address_format_62bit_generic:
2766       return nir_ball_iequal(b, addr0, addr1);
2767 
2768    case nir_address_format_64bit_global_32bit_offset:
2769       return nir_ball_iequal(b, nir_channels(b, addr0, 0xb),
2770                                 nir_channels(b, addr1, 0xb));
2771 
2772    case nir_address_format_32bit_offset_as_64bit:
2773       assert(addr0->num_components == 1 && addr1->num_components == 1);
2774       return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1));
2775 
2776    case nir_address_format_32bit_index_offset_pack64:
2777       assert(addr0->num_components == 1 && addr1->num_components == 1);
2778       return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1));
2779 
2780    case nir_address_format_logical:
2781       unreachable("Unsupported address format");
2782    }
2783 
2784    unreachable("Invalid address format");
2785 }
2786 
2787 nir_ssa_def *
nir_build_addr_isub(nir_builder * b,nir_ssa_def * addr0,nir_ssa_def * addr1,nir_address_format addr_format)2788 nir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
2789                     nir_address_format addr_format)
2790 {
2791    switch (addr_format) {
2792    case nir_address_format_32bit_global:
2793    case nir_address_format_64bit_global:
2794    case nir_address_format_32bit_offset:
2795    case nir_address_format_32bit_index_offset_pack64:
2796    case nir_address_format_62bit_generic:
2797       assert(addr0->num_components == 1);
2798       assert(addr1->num_components == 1);
2799       return nir_isub(b, addr0, addr1);
2800 
2801    case nir_address_format_2x32bit_global:
2802       return nir_isub(b, addr_to_global(b, addr0, addr_format),
2803                          addr_to_global(b, addr1, addr_format));
2804 
2805    case nir_address_format_32bit_offset_as_64bit:
2806       assert(addr0->num_components == 1);
2807       assert(addr1->num_components == 1);
2808       return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)));
2809 
2810    case nir_address_format_64bit_global_32bit_offset:
2811    case nir_address_format_64bit_bounded_global:
2812       return nir_isub(b, addr_to_global(b, addr0, addr_format),
2813                          addr_to_global(b, addr1, addr_format));
2814 
2815    case nir_address_format_32bit_index_offset:
2816       assert(addr0->num_components == 2);
2817       assert(addr1->num_components == 2);
2818       /* Assume the same buffer index. */
2819       return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
2820 
2821    case nir_address_format_vec2_index_32bit_offset:
2822       assert(addr0->num_components == 3);
2823       assert(addr1->num_components == 3);
2824       /* Assume the same buffer index. */
2825       return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2));
2826 
2827    case nir_address_format_logical:
2828       unreachable("Unsupported address format");
2829    }
2830 
2831    unreachable("Invalid address format");
2832 }
2833 
2834 static bool
is_input(nir_intrinsic_instr * intrin)2835 is_input(nir_intrinsic_instr *intrin)
2836 {
2837    return intrin->intrinsic == nir_intrinsic_load_input ||
2838           intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
2839           intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
2840           intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
2841 }
2842 
2843 static bool
is_output(nir_intrinsic_instr * intrin)2844 is_output(nir_intrinsic_instr *intrin)
2845 {
2846    return intrin->intrinsic == nir_intrinsic_load_output ||
2847           intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
2848           intrin->intrinsic == nir_intrinsic_load_per_primitive_output ||
2849           intrin->intrinsic == nir_intrinsic_store_output ||
2850           intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2851           intrin->intrinsic == nir_intrinsic_store_per_primitive_output;
2852 }
2853 
is_dual_slot(nir_intrinsic_instr * intrin)2854 static bool is_dual_slot(nir_intrinsic_instr *intrin)
2855 {
2856    if (intrin->intrinsic == nir_intrinsic_store_output ||
2857        intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2858        intrin->intrinsic == nir_intrinsic_store_per_primitive_output) {
2859       return nir_src_bit_size(intrin->src[0]) == 64 &&
2860              nir_src_num_components(intrin->src[0]) >= 3;
2861    }
2862 
2863    return nir_dest_bit_size(intrin->dest) == 64 &&
2864           nir_dest_num_components(intrin->dest) >= 3;
2865 }
2866 
2867 /**
2868  * This pass adds constant offsets to instr->const_index[0] for input/output
2869  * intrinsics, and resets the offset source to 0.  Non-constant offsets remain
2870  * unchanged - since we don't know what part of a compound variable is
2871  * accessed, we allocate storage for the entire thing. For drivers that use
2872  * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
2873  * the offset source will be 0, so that they don't have to add it in manually.
2874  */
2875 
2876 static bool
add_const_offset_to_base_block(nir_block * block,nir_builder * b,nir_variable_mode modes)2877 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
2878                                nir_variable_mode modes)
2879 {
2880    bool progress = false;
2881    nir_foreach_instr_safe(instr, block) {
2882       if (instr->type != nir_instr_type_intrinsic)
2883          continue;
2884 
2885       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2886 
2887       if (((modes & nir_var_shader_in) && is_input(intrin)) ||
2888           ((modes & nir_var_shader_out) && is_output(intrin))) {
2889          nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
2890 
2891          /* NV_mesh_shader: ignore MS primitive indices. */
2892          if (b->shader->info.stage == MESA_SHADER_MESH &&
2893              sem.location == VARYING_SLOT_PRIMITIVE_INDICES &&
2894              !(b->shader->info.per_primitive_outputs &
2895                BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES)))
2896             continue;
2897 
2898          nir_src *offset = nir_get_io_offset_src(intrin);
2899 
2900          /* TODO: Better handling of per-view variables here */
2901          if (nir_src_is_const(*offset) &&
2902              !nir_intrinsic_io_semantics(intrin).per_view) {
2903             unsigned off = nir_src_as_uint(*offset);
2904 
2905             nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off);
2906 
2907             sem.location += off;
2908             /* non-indirect indexing should reduce num_slots */
2909             sem.num_slots = is_dual_slot(intrin) ? 2 : 1;
2910             nir_intrinsic_set_io_semantics(intrin, sem);
2911 
2912             b->cursor = nir_before_instr(&intrin->instr);
2913             nir_instr_rewrite_src(&intrin->instr, offset,
2914                                   nir_src_for_ssa(nir_imm_int(b, 0)));
2915             progress = true;
2916          }
2917       }
2918    }
2919 
2920    return progress;
2921 }
2922 
2923 bool
nir_io_add_const_offset_to_base(nir_shader * nir,nir_variable_mode modes)2924 nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes)
2925 {
2926    bool progress = false;
2927 
2928    nir_foreach_function(f, nir) {
2929       if (f->impl) {
2930          bool impl_progress = false;
2931          nir_builder b;
2932          nir_builder_init(&b, f->impl);
2933          nir_foreach_block(block, f->impl) {
2934             impl_progress |= add_const_offset_to_base_block(block, &b, modes);
2935          }
2936          progress |= impl_progress;
2937          if (impl_progress)
2938             nir_metadata_preserve(f->impl, nir_metadata_block_index | nir_metadata_dominance);
2939          else
2940             nir_metadata_preserve(f->impl, nir_metadata_all);
2941       }
2942    }
2943 
2944    return progress;
2945 }
2946 
2947 static bool
nir_lower_color_inputs(nir_shader * nir)2948 nir_lower_color_inputs(nir_shader *nir)
2949 {
2950    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2951    bool progress = false;
2952 
2953    nir_builder b;
2954    nir_builder_init(&b, impl);
2955 
2956    nir_foreach_block (block, impl) {
2957       nir_foreach_instr_safe (instr, block) {
2958          if (instr->type != nir_instr_type_intrinsic)
2959             continue;
2960 
2961          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2962 
2963          if (intrin->intrinsic != nir_intrinsic_load_deref)
2964             continue;
2965 
2966          nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2967          if (!nir_deref_mode_is(deref, nir_var_shader_in))
2968             continue;
2969 
2970          b.cursor = nir_before_instr(instr);
2971          nir_variable *var = nir_deref_instr_get_variable(deref);
2972          nir_ssa_def *def;
2973 
2974          if (var->data.location == VARYING_SLOT_COL0) {
2975             def = nir_load_color0(&b);
2976             nir->info.fs.color0_interp = var->data.interpolation;
2977             nir->info.fs.color0_sample = var->data.sample;
2978             nir->info.fs.color0_centroid = var->data.centroid;
2979          } else if (var->data.location == VARYING_SLOT_COL1) {
2980             def = nir_load_color1(&b);
2981             nir->info.fs.color1_interp = var->data.interpolation;
2982             nir->info.fs.color1_sample = var->data.sample;
2983             nir->info.fs.color1_centroid = var->data.centroid;
2984          } else {
2985             continue;
2986          }
2987 
2988          nir_ssa_def_rewrite_uses(&intrin->dest.ssa, def);
2989          nir_instr_remove(instr);
2990          progress = true;
2991       }
2992    }
2993 
2994    if (progress) {
2995       nir_metadata_preserve(impl, nir_metadata_dominance |
2996                                   nir_metadata_block_index);
2997    } else {
2998       nir_metadata_preserve(impl, nir_metadata_all);
2999    }
3000    return progress;
3001 }
3002 
3003 bool
nir_io_add_intrinsic_xfb_info(nir_shader * nir)3004 nir_io_add_intrinsic_xfb_info(nir_shader *nir)
3005 {
3006    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3007    bool progress = false;
3008 
3009    for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++)
3010       nir->info.xfb_stride[i] = nir->xfb_info->buffers[i].stride / 4;
3011 
3012    nir_foreach_block (block, impl) {
3013       nir_foreach_instr_safe (instr, block) {
3014          if (instr->type != nir_instr_type_intrinsic)
3015             continue;
3016 
3017          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3018 
3019          if (!nir_intrinsic_has_io_xfb(intr))
3020             continue;
3021 
3022          /* No indirect indexing allowed. The index is implied to be 0. */
3023          ASSERTED nir_src offset = *nir_get_io_offset_src(intr);
3024          assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
3025 
3026          /* Calling this pass for the second time shouldn't do anything. */
3027          if (nir_intrinsic_io_xfb(intr).out[0].num_components ||
3028              nir_intrinsic_io_xfb(intr).out[1].num_components ||
3029              nir_intrinsic_io_xfb2(intr).out[0].num_components ||
3030              nir_intrinsic_io_xfb2(intr).out[1].num_components)
3031             continue;
3032 
3033          nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3034          unsigned writemask = nir_intrinsic_write_mask(intr) <<
3035                             nir_intrinsic_component(intr);
3036 
3037          nir_io_xfb xfb[2];
3038          memset(xfb, 0, sizeof(xfb));
3039 
3040          for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
3041             nir_xfb_output_info *out = &nir->xfb_info->outputs[i];
3042             if (out->location == sem.location) {
3043                unsigned xfb_mask = writemask & out->component_mask;
3044 
3045                /*fprintf(stdout, "output%u: buffer=%u, offset=%u, location=%u, "
3046                            "component_offset=%u, component_mask=0x%x, xfb_mask=0x%x, slots=%u\n",
3047                        i, out->buffer,
3048                        out->offset,
3049                        out->location,
3050                        out->component_offset,
3051                        out->component_mask,
3052                        xfb_mask, sem.num_slots);*/
3053 
3054                while (xfb_mask) {
3055                   int start, count;
3056                   u_bit_scan_consecutive_range(&xfb_mask, &start, &count);
3057 
3058                   xfb[start / 2].out[start % 2].num_components = count;
3059                   xfb[start / 2].out[start % 2].buffer = out->buffer;
3060                   /* out->offset is relative to the first stored xfb component */
3061                   /* start is relative to component 0 */
3062                   xfb[start / 2].out[start % 2].offset =
3063                      out->offset / 4 - out->component_offset + start;
3064 
3065                   progress = true;
3066                }
3067             }
3068          }
3069 
3070          nir_intrinsic_set_io_xfb(intr, xfb[0]);
3071          nir_intrinsic_set_io_xfb2(intr, xfb[1]);
3072       }
3073    }
3074 
3075    nir_metadata_preserve(impl, nir_metadata_all);
3076    return progress;
3077 }
3078 
3079 static int
type_size_vec4(const struct glsl_type * type,bool bindless)3080 type_size_vec4(const struct glsl_type *type, bool bindless)
3081 {
3082    return glsl_count_attribute_slots(type, false);
3083 }
3084 
3085 void
nir_lower_io_passes(nir_shader * nir)3086 nir_lower_io_passes(nir_shader *nir)
3087 {
3088    if (!nir->options->lower_io_variables)
3089       return;
3090 
3091    bool has_indirect_inputs =
3092       (nir->options->support_indirect_inputs >> nir->info.stage) & 0x1;
3093 
3094    /* Transform feedback requires that indirect outputs are lowered. */
3095    bool has_indirect_outputs =
3096       (nir->options->support_indirect_outputs >> nir->info.stage) & 0x1 &&
3097       nir->xfb_info == NULL;
3098 
3099    if (!has_indirect_inputs || !has_indirect_outputs) {
3100       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
3101                  nir_shader_get_entrypoint(nir), !has_indirect_outputs,
3102                  !has_indirect_inputs);
3103 
3104       /* We need to lower all the copy_deref's introduced by lower_io_to-
3105        * _temporaries before calling nir_lower_io.
3106        */
3107       NIR_PASS_V(nir, nir_split_var_copies);
3108       NIR_PASS_V(nir, nir_lower_var_copies);
3109       NIR_PASS_V(nir, nir_lower_global_vars_to_local);
3110    }
3111 
3112    if (nir->info.stage == MESA_SHADER_FRAGMENT &&
3113        nir->options->lower_fs_color_inputs)
3114       NIR_PASS_V(nir, nir_lower_color_inputs);
3115 
3116    NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in,
3117               type_size_vec4, nir_lower_io_lower_64bit_to_32);
3118 
3119    /* nir_io_add_const_offset_to_base needs actual constants. */
3120    NIR_PASS_V(nir, nir_opt_constant_folding);
3121    NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
3122                                                     nir_var_shader_out);
3123 
3124    /* Lower and remove dead derefs and variables to clean up the IR. */
3125    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3126    NIR_PASS_V(nir, nir_opt_dce);
3127    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp |
3128               nir_var_shader_in | nir_var_shader_out, NULL);
3129 
3130    if (nir->xfb_info)
3131       NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
3132 
3133    nir->info.io_lowered = true;
3134 }
3135