• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /*
25  * This lowering pass converts references to input/output variables with
26  * loads/stores to actual input/output intrinsics.
27  */
28 
29 #include "nir.h"
30 #include "nir_builder.h"
31 #include "nir_deref.h"
32 #include "nir_xfb_info.h"
33 
34 #include "util/u_math.h"
35 
36 struct lower_io_state {
37    void *dead_ctx;
38    nir_builder builder;
39    int (*type_size)(const struct glsl_type *type, bool);
40    nir_variable_mode modes;
41    nir_lower_io_options options;
42 };
43 
44 static nir_intrinsic_op
ssbo_atomic_for_deref(nir_intrinsic_op deref_op)45 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
46 {
47    switch (deref_op) {
48    case nir_intrinsic_deref_atomic:
49       return nir_intrinsic_ssbo_atomic;
50    case nir_intrinsic_deref_atomic_swap:
51       return nir_intrinsic_ssbo_atomic_swap;
52    default:
53       unreachable("Invalid SSBO atomic");
54    }
55 }
56 
57 static nir_intrinsic_op
global_atomic_for_deref(nir_address_format addr_format,nir_intrinsic_op deref_op)58 global_atomic_for_deref(nir_address_format addr_format,
59                         nir_intrinsic_op deref_op)
60 {
61    switch (deref_op) {
62    case nir_intrinsic_deref_atomic:
63       if (addr_format != nir_address_format_2x32bit_global)
64          return nir_intrinsic_global_atomic;
65       else
66          return nir_intrinsic_global_atomic_2x32;
67 
68    case nir_intrinsic_deref_atomic_swap:
69       if (addr_format != nir_address_format_2x32bit_global)
70          return nir_intrinsic_global_atomic_swap;
71       else
72          return nir_intrinsic_global_atomic_swap_2x32;
73 
74    default:
75       unreachable("Invalid SSBO atomic");
76    }
77 }
78 
79 static nir_intrinsic_op
shared_atomic_for_deref(nir_intrinsic_op deref_op)80 shared_atomic_for_deref(nir_intrinsic_op deref_op)
81 {
82    switch (deref_op) {
83    case nir_intrinsic_deref_atomic:
84       return nir_intrinsic_shared_atomic;
85    case nir_intrinsic_deref_atomic_swap:
86       return nir_intrinsic_shared_atomic_swap;
87    default:
88       unreachable("Invalid shared atomic");
89    }
90 }
91 
92 static nir_intrinsic_op
task_payload_atomic_for_deref(nir_intrinsic_op deref_op)93 task_payload_atomic_for_deref(nir_intrinsic_op deref_op)
94 {
95    switch (deref_op) {
96    case nir_intrinsic_deref_atomic:
97       return nir_intrinsic_task_payload_atomic;
98    case nir_intrinsic_deref_atomic_swap:
99       return nir_intrinsic_task_payload_atomic_swap;
100    default:
101       unreachable("Invalid task payload atomic");
102    }
103 }
104 
105 void
nir_assign_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,int (* type_size)(const struct glsl_type *,bool))106 nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
107                          unsigned *size,
108                          int (*type_size)(const struct glsl_type *, bool))
109 {
110    unsigned location = 0;
111 
112    nir_foreach_variable_with_modes(var, shader, mode) {
113       var->data.driver_location = location;
114       bool bindless_type_size = var->data.mode == nir_var_shader_in ||
115                                 var->data.mode == nir_var_shader_out ||
116                                 var->data.bindless;
117       location += type_size(var->type, bindless_type_size);
118    }
119 
120    *size = location;
121 }
122 
123 /**
124  * Some inputs and outputs are arrayed, meaning that there is an extra level
125  * of array indexing to handle mismatches between the shader interface and the
126  * dispatch pattern of the shader.  For instance, geometry shaders are
127  * executed per-primitive while their inputs and outputs are specified
128  * per-vertex so all inputs and outputs have to be additionally indexed with
129  * the vertex index within the primitive.
130  */
131 bool
nir_is_arrayed_io(const nir_variable * var,gl_shader_stage stage)132 nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage)
133 {
134    if (var->data.patch || !glsl_type_is_array(var->type))
135       return false;
136 
137    if (stage == MESA_SHADER_MESH) {
138       /* NV_mesh_shader: this is flat array for the whole workgroup. */
139       if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES)
140          return var->data.per_primitive;
141    }
142 
143    if (var->data.mode == nir_var_shader_in) {
144       if (var->data.per_vertex) {
145          assert(stage == MESA_SHADER_FRAGMENT);
146          return true;
147       }
148 
149       return stage == MESA_SHADER_GEOMETRY ||
150              stage == MESA_SHADER_TESS_CTRL ||
151              stage == MESA_SHADER_TESS_EVAL;
152    }
153 
154    if (var->data.mode == nir_var_shader_out)
155       return stage == MESA_SHADER_TESS_CTRL ||
156              stage == MESA_SHADER_MESH;
157 
158    return false;
159 }
160 
161 static bool
uses_high_dvec2_semantic(struct lower_io_state * state,const nir_variable * var)162 uses_high_dvec2_semantic(struct lower_io_state *state,
163                          const nir_variable *var)
164 {
165    return state->builder.shader->info.stage == MESA_SHADER_VERTEX &&
166           state->options & nir_lower_io_lower_64bit_to_32_new &&
167           var->data.mode == nir_var_shader_in &&
168           glsl_type_is_dual_slot(glsl_without_array(var->type));
169 }
170 
171 static unsigned
get_number_of_slots(struct lower_io_state * state,const nir_variable * var)172 get_number_of_slots(struct lower_io_state *state,
173                     const nir_variable *var)
174 {
175    const struct glsl_type *type = var->type;
176 
177    if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) {
178       assert(glsl_type_is_array(type));
179       type = glsl_get_array_element(type);
180    }
181 
182    /* NV_mesh_shader:
183     * PRIMITIVE_INDICES is a flat array, not a proper arrayed output,
184     * as opposed to D3D-style mesh shaders where it's addressed by
185     * the primitive index.
186     * Prevent assigning several slots to primitive indices,
187     * to avoid some issues.
188     */
189    if (state->builder.shader->info.stage == MESA_SHADER_MESH &&
190        var->data.location == VARYING_SLOT_PRIMITIVE_INDICES &&
191        !nir_is_arrayed_io(var, state->builder.shader->info.stage))
192       return 1;
193 
194    return state->type_size(type, var->data.bindless) /
195           (uses_high_dvec2_semantic(state, var) ? 2 : 1);
196 }
197 
198 static nir_def *
get_io_offset(nir_builder * b,nir_deref_instr * deref,nir_def ** array_index,int (* type_size)(const struct glsl_type *,bool),unsigned * component,bool bts)199 get_io_offset(nir_builder *b, nir_deref_instr *deref,
200               nir_def **array_index,
201               int (*type_size)(const struct glsl_type *, bool),
202               unsigned *component, bool bts)
203 {
204    nir_deref_path path;
205    nir_deref_path_init(&path, deref, NULL);
206 
207    assert(path.path[0]->deref_type == nir_deref_type_var);
208    nir_deref_instr **p = &path.path[1];
209 
210    /* For arrayed I/O (e.g., per-vertex input arrays in geometry shader
211     * inputs), skip the outermost array index.  Process the rest normally.
212     */
213    if (array_index != NULL) {
214       assert((*p)->deref_type == nir_deref_type_array);
215       *array_index = (*p)->arr.index.ssa;
216       p++;
217    }
218 
219    if (path.path[0]->var->data.compact && nir_src_is_const((*p)->arr.index)) {
220       assert((*p)->deref_type == nir_deref_type_array);
221       assert(glsl_type_is_scalar((*p)->type));
222 
223       /* We always lower indirect dereferences for "compact" array vars. */
224       const unsigned index = nir_src_as_uint((*p)->arr.index);
225       const unsigned total_offset = *component + index;
226       const unsigned slot_offset = total_offset / 4;
227       *component = total_offset % 4;
228       return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
229    }
230 
231    /* Just emit code and let constant-folding go to town */
232    nir_def *offset = nir_imm_int(b, 0);
233 
234    for (; *p; p++) {
235       if ((*p)->deref_type == nir_deref_type_array) {
236          unsigned size = type_size((*p)->type, bts);
237 
238          nir_def *mul =
239             nir_amul_imm(b, (*p)->arr.index.ssa, size);
240 
241          offset = nir_iadd(b, offset, mul);
242       } else if ((*p)->deref_type == nir_deref_type_struct) {
243          /* p starts at path[1], so this is safe */
244          nir_deref_instr *parent = *(p - 1);
245 
246          unsigned field_offset = 0;
247          for (unsigned i = 0; i < (*p)->strct.index; i++) {
248             field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
249          }
250          offset = nir_iadd_imm(b, offset, field_offset);
251       } else {
252          unreachable("Unsupported deref type");
253       }
254    }
255 
256    nir_deref_path_finish(&path);
257 
258    return offset;
259 }
260 
261 static nir_def *
emit_load(struct lower_io_state * state,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,unsigned num_components,unsigned bit_size,nir_alu_type dest_type,bool high_dvec2)262 emit_load(struct lower_io_state *state,
263           nir_def *array_index, nir_variable *var, nir_def *offset,
264           unsigned component, unsigned num_components, unsigned bit_size,
265           nir_alu_type dest_type, bool high_dvec2)
266 {
267    nir_builder *b = &state->builder;
268    const nir_shader *nir = b->shader;
269    nir_variable_mode mode = var->data.mode;
270    nir_def *barycentric = NULL;
271 
272    nir_intrinsic_op op;
273    switch (mode) {
274    case nir_var_shader_in:
275       if (nir->info.stage == MESA_SHADER_FRAGMENT &&
276           nir->options->use_interpolated_input_intrinsics &&
277           var->data.interpolation != INTERP_MODE_FLAT &&
278           !var->data.per_primitive) {
279          if (var->data.interpolation == INTERP_MODE_EXPLICIT ||
280              var->data.per_vertex) {
281             assert(array_index != NULL);
282             op = nir_intrinsic_load_input_vertex;
283          } else {
284             assert(array_index == NULL);
285 
286             nir_intrinsic_op bary_op;
287             if (var->data.sample)
288                bary_op = nir_intrinsic_load_barycentric_sample;
289             else if (var->data.centroid)
290                bary_op = nir_intrinsic_load_barycentric_centroid;
291             else
292                bary_op = nir_intrinsic_load_barycentric_pixel;
293 
294             barycentric = nir_load_barycentric(&state->builder, bary_op,
295                                                var->data.interpolation);
296             op = nir_intrinsic_load_interpolated_input;
297          }
298       } else {
299          op = array_index ? nir_intrinsic_load_per_vertex_input : nir_intrinsic_load_input;
300       }
301       break;
302    case nir_var_shader_out:
303       op = !array_index ? nir_intrinsic_load_output : var->data.per_primitive ? nir_intrinsic_load_per_primitive_output
304                                                                               : nir_intrinsic_load_per_vertex_output;
305       break;
306    case nir_var_uniform:
307       op = nir_intrinsic_load_uniform;
308       break;
309    default:
310       unreachable("Unknown variable mode");
311    }
312 
313    nir_intrinsic_instr *load =
314       nir_intrinsic_instr_create(state->builder.shader, op);
315    load->num_components = num_components;
316 
317    nir_intrinsic_set_base(load, var->data.driver_location);
318    if (nir_intrinsic_has_range(load)) {
319       const struct glsl_type *type = var->type;
320       if (array_index)
321          type = glsl_get_array_element(type);
322       unsigned var_size = state->type_size(type, var->data.bindless);
323       nir_intrinsic_set_range(load, var_size);
324    }
325 
326    if (mode == nir_var_shader_in || mode == nir_var_shader_out)
327       nir_intrinsic_set_component(load, component);
328 
329    if (nir_intrinsic_has_access(load))
330       nir_intrinsic_set_access(load, var->data.access);
331 
332    nir_intrinsic_set_dest_type(load, dest_type);
333 
334    if (load->intrinsic != nir_intrinsic_load_uniform) {
335       nir_io_semantics semantics = { 0 };
336       semantics.location = var->data.location;
337       semantics.num_slots = get_number_of_slots(state, var);
338       semantics.fb_fetch_output = var->data.fb_fetch_output;
339       semantics.medium_precision =
340          var->data.precision == GLSL_PRECISION_MEDIUM ||
341          var->data.precision == GLSL_PRECISION_LOW;
342       semantics.high_dvec2 = high_dvec2;
343       nir_intrinsic_set_io_semantics(load, semantics);
344    }
345 
346    if (array_index) {
347       load->src[0] = nir_src_for_ssa(array_index);
348       load->src[1] = nir_src_for_ssa(offset);
349    } else if (barycentric) {
350       load->src[0] = nir_src_for_ssa(barycentric);
351       load->src[1] = nir_src_for_ssa(offset);
352    } else {
353       load->src[0] = nir_src_for_ssa(offset);
354    }
355 
356    nir_def_init(&load->instr, &load->def, num_components, bit_size);
357    nir_builder_instr_insert(b, &load->instr);
358 
359    return &load->def;
360 }
361 
362 static nir_def *
lower_load(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,const struct glsl_type * type)363 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
364            nir_def *array_index, nir_variable *var, nir_def *offset,
365            unsigned component, const struct glsl_type *type)
366 {
367    const bool lower_double = !glsl_type_is_integer(type) && state->options & nir_lower_io_lower_64bit_float_to_32;
368    if (intrin->def.bit_size == 64 &&
369        (lower_double || (state->options & (nir_lower_io_lower_64bit_to_32_new |
370                                            nir_lower_io_lower_64bit_to_32)))) {
371       nir_builder *b = &state->builder;
372       bool use_high_dvec2_semantic = uses_high_dvec2_semantic(state, var);
373 
374       /* Each slot is a dual slot, so divide the offset within the variable
375        * by 2.
376        */
377       if (use_high_dvec2_semantic)
378          offset = nir_ushr_imm(b, offset, 1);
379 
380       const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
381 
382       nir_def *comp64[4];
383       assert(component == 0 || component == 2);
384       unsigned dest_comp = 0;
385       bool high_dvec2 = false;
386       while (dest_comp < intrin->def.num_components) {
387          const unsigned num_comps =
388             MIN2(intrin->def.num_components - dest_comp,
389                  (4 - component) / 2);
390 
391          nir_def *data32 =
392             emit_load(state, array_index, var, offset, component,
393                       num_comps * 2, 32, nir_type_uint32, high_dvec2);
394          for (unsigned i = 0; i < num_comps; i++) {
395             comp64[dest_comp + i] =
396                nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
397          }
398 
399          /* Only the first store has a component offset */
400          component = 0;
401          dest_comp += num_comps;
402 
403          if (use_high_dvec2_semantic) {
404             /* Increment the offset when we wrap around the dual slot. */
405             if (high_dvec2)
406                offset = nir_iadd_imm(b, offset, slot_size);
407             high_dvec2 = !high_dvec2;
408          } else {
409             offset = nir_iadd_imm(b, offset, slot_size);
410          }
411       }
412 
413       return nir_vec(b, comp64, intrin->def.num_components);
414    } else if (intrin->def.bit_size == 1) {
415       /* Booleans are 32-bit */
416       assert(glsl_type_is_boolean(type));
417       return nir_b2b1(&state->builder,
418                       emit_load(state, array_index, var, offset, component,
419                                 intrin->def.num_components, 32,
420                                 nir_type_bool32, false));
421    } else {
422       return emit_load(state, array_index, var, offset, component,
423                        intrin->def.num_components,
424                        intrin->def.bit_size,
425                        nir_get_nir_type_for_glsl_type(type), false);
426    }
427 }
428 
429 static void
emit_store(struct lower_io_state * state,nir_def * data,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,unsigned num_components,nir_component_mask_t write_mask,nir_alu_type src_type)430 emit_store(struct lower_io_state *state, nir_def *data,
431            nir_def *array_index, nir_variable *var, nir_def *offset,
432            unsigned component, unsigned num_components,
433            nir_component_mask_t write_mask, nir_alu_type src_type)
434 {
435    nir_builder *b = &state->builder;
436 
437    assert(var->data.mode == nir_var_shader_out);
438    nir_intrinsic_op op =
439       !array_index ? nir_intrinsic_store_output : var->data.per_primitive ? nir_intrinsic_store_per_primitive_output
440                                                                           : nir_intrinsic_store_per_vertex_output;
441 
442    nir_intrinsic_instr *store =
443       nir_intrinsic_instr_create(state->builder.shader, op);
444    store->num_components = num_components;
445 
446    store->src[0] = nir_src_for_ssa(data);
447 
448    const struct glsl_type *type = var->type;
449    if (array_index)
450       type = glsl_get_array_element(type);
451    unsigned var_size = state->type_size(type, var->data.bindless);
452    nir_intrinsic_set_base(store, var->data.driver_location);
453    nir_intrinsic_set_range(store, var_size);
454    nir_intrinsic_set_component(store, component);
455    nir_intrinsic_set_src_type(store, src_type);
456 
457    nir_intrinsic_set_write_mask(store, write_mask);
458 
459    if (nir_intrinsic_has_access(store))
460       nir_intrinsic_set_access(store, var->data.access);
461 
462    if (array_index)
463       store->src[1] = nir_src_for_ssa(array_index);
464 
465    store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset);
466 
467    unsigned gs_streams = 0;
468    if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) {
469       if (var->data.stream & NIR_STREAM_PACKED) {
470          gs_streams = var->data.stream & ~NIR_STREAM_PACKED;
471       } else {
472          assert(var->data.stream < 4);
473          gs_streams = 0;
474          for (unsigned i = 0; i < num_components; ++i)
475             gs_streams |= var->data.stream << (2 * i);
476       }
477    }
478 
479    nir_io_semantics semantics = { 0 };
480    semantics.location = var->data.location;
481    semantics.num_slots = get_number_of_slots(state, var);
482    semantics.dual_source_blend_index = var->data.index;
483    semantics.gs_streams = gs_streams;
484    semantics.medium_precision =
485       var->data.precision == GLSL_PRECISION_MEDIUM ||
486       var->data.precision == GLSL_PRECISION_LOW;
487    semantics.per_view = var->data.per_view;
488    semantics.invariant = var->data.invariant;
489 
490    nir_intrinsic_set_io_semantics(store, semantics);
491 
492    nir_builder_instr_insert(b, &store->instr);
493 }
494 
495 static void
lower_store(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,const struct glsl_type * type)496 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
497             nir_def *array_index, nir_variable *var, nir_def *offset,
498             unsigned component, const struct glsl_type *type)
499 {
500    const bool lower_double = !glsl_type_is_integer(type) && state->options & nir_lower_io_lower_64bit_float_to_32;
501    if (intrin->src[1].ssa->bit_size == 64 &&
502        (lower_double || (state->options & (nir_lower_io_lower_64bit_to_32 |
503                                            nir_lower_io_lower_64bit_to_32_new)))) {
504       nir_builder *b = &state->builder;
505 
506       const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
507 
508       assert(component == 0 || component == 2);
509       unsigned src_comp = 0;
510       nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
511       while (src_comp < intrin->num_components) {
512          const unsigned num_comps =
513             MIN2(intrin->num_components - src_comp,
514                  (4 - component) / 2);
515 
516          if (write_mask & BITFIELD_MASK(num_comps)) {
517             nir_def *data =
518                nir_channels(b, intrin->src[1].ssa,
519                             BITFIELD_RANGE(src_comp, num_comps));
520             nir_def *data32 = nir_bitcast_vector(b, data, 32);
521 
522             uint32_t write_mask32 = 0;
523             for (unsigned i = 0; i < num_comps; i++) {
524                if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
525                   write_mask32 |= 3 << (i * 2);
526             }
527 
528             emit_store(state, data32, array_index, var, offset,
529                        component, data32->num_components, write_mask32,
530                        nir_type_uint32);
531          }
532 
533          /* Only the first store has a component offset */
534          component = 0;
535          src_comp += num_comps;
536          write_mask >>= num_comps;
537          offset = nir_iadd_imm(b, offset, slot_size);
538       }
539    } else if (intrin->def.bit_size == 1) {
540       /* Booleans are 32-bit */
541       assert(glsl_type_is_boolean(type));
542       nir_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
543       emit_store(state, b32_val, array_index, var, offset,
544                  component, intrin->num_components,
545                  nir_intrinsic_write_mask(intrin),
546                  nir_type_bool32);
547    } else {
548       emit_store(state, intrin->src[1].ssa, array_index, var, offset,
549                  component, intrin->num_components,
550                  nir_intrinsic_write_mask(intrin),
551                  nir_get_nir_type_for_glsl_type(type));
552    }
553 }
554 
555 static nir_def *
lower_interpolate_at(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_variable * var,nir_def * offset,unsigned component,const struct glsl_type * type)556 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
557                      nir_variable *var, nir_def *offset, unsigned component,
558                      const struct glsl_type *type)
559 {
560    nir_builder *b = &state->builder;
561    assert(var->data.mode == nir_var_shader_in);
562 
563    /* Ignore interpolateAt() for flat variables - flat is flat. Lower
564     * interpolateAtVertex() for explicit variables.
565     */
566    if (var->data.interpolation == INTERP_MODE_FLAT ||
567        var->data.interpolation == INTERP_MODE_EXPLICIT) {
568       nir_def *vertex_index = NULL;
569 
570       if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
571          assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
572          vertex_index = intrin->src[1].ssa;
573       }
574 
575       return lower_load(intrin, state, vertex_index, var, offset, component, type);
576    }
577 
578    /* None of the supported APIs allow interpolation on 64-bit things */
579    assert(intrin->def.bit_size <= 32);
580 
581    nir_intrinsic_op bary_op;
582    switch (intrin->intrinsic) {
583    case nir_intrinsic_interp_deref_at_centroid:
584       bary_op = nir_intrinsic_load_barycentric_centroid;
585       break;
586    case nir_intrinsic_interp_deref_at_sample:
587       bary_op = nir_intrinsic_load_barycentric_at_sample;
588       break;
589    case nir_intrinsic_interp_deref_at_offset:
590       bary_op = nir_intrinsic_load_barycentric_at_offset;
591       break;
592    default:
593       unreachable("Bogus interpolateAt() intrinsic.");
594    }
595 
596    nir_intrinsic_instr *bary_setup =
597       nir_intrinsic_instr_create(state->builder.shader, bary_op);
598 
599    nir_def_init(&bary_setup->instr, &bary_setup->def, 2, 32);
600    nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
601 
602    if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
603        intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
604        intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
605       bary_setup->src[0] = nir_src_for_ssa(intrin->src[1].ssa);
606 
607    nir_builder_instr_insert(b, &bary_setup->instr);
608 
609    nir_io_semantics semantics = { 0 };
610    semantics.location = var->data.location;
611    semantics.num_slots = get_number_of_slots(state, var);
612    semantics.medium_precision =
613       var->data.precision == GLSL_PRECISION_MEDIUM ||
614       var->data.precision == GLSL_PRECISION_LOW;
615 
616    nir_def *load =
617       nir_load_interpolated_input(&state->builder,
618                                   intrin->def.num_components,
619                                   intrin->def.bit_size,
620                                   &bary_setup->def,
621                                   offset,
622                                   .base = var->data.driver_location,
623                                   .component = component,
624                                   .io_semantics = semantics,
625                                   .dest_type = nir_type_float | intrin->def.bit_size);
626 
627    return load;
628 }
629 
630 static bool
nir_lower_io_block(nir_block * block,struct lower_io_state * state)631 nir_lower_io_block(nir_block *block,
632                    struct lower_io_state *state)
633 {
634    nir_builder *b = &state->builder;
635    const nir_shader_compiler_options *options = b->shader->options;
636    bool progress = false;
637 
638    nir_foreach_instr_safe(instr, block) {
639       if (instr->type != nir_instr_type_intrinsic)
640          continue;
641 
642       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
643 
644       switch (intrin->intrinsic) {
645       case nir_intrinsic_load_deref:
646       case nir_intrinsic_store_deref:
647          /* We can lower the io for this nir instrinsic */
648          break;
649       case nir_intrinsic_interp_deref_at_centroid:
650       case nir_intrinsic_interp_deref_at_sample:
651       case nir_intrinsic_interp_deref_at_offset:
652       case nir_intrinsic_interp_deref_at_vertex:
653          /* We can optionally lower these to load_interpolated_input */
654          if (options->use_interpolated_input_intrinsics ||
655              options->lower_interpolate_at)
656             break;
657          FALLTHROUGH;
658       default:
659          /* We can't lower the io for this nir instrinsic, so skip it */
660          continue;
661       }
662 
663       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
664       if (!nir_deref_mode_is_one_of(deref, state->modes))
665          continue;
666 
667       nir_variable *var = nir_deref_instr_get_variable(deref);
668 
669       b->cursor = nir_before_instr(instr);
670 
671       const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage);
672 
673       nir_def *offset;
674       nir_def *array_index = NULL;
675       unsigned component_offset = var->data.location_frac;
676       bool bindless_type_size = var->data.mode == nir_var_shader_in ||
677                                 var->data.mode == nir_var_shader_out ||
678                                 var->data.bindless;
679 
680       if (nir_deref_instr_is_known_out_of_bounds(deref)) {
681          /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
682           *
683           *    In the subsections described above for array, vector, matrix and
684           *    structure accesses, any out-of-bounds access produced undefined
685           *    behavior....
686           *    Out-of-bounds reads return undefined values, which
687           *    include values from other variables of the active program or zero.
688           *    Out-of-bounds writes may be discarded or overwrite
689           *    other variables of the active program.
690           *
691           * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero
692           * for reads.
693           *
694           * Otherwise get_io_offset would return out-of-bound offset which may
695           * result in out-of-bound loading/storing of inputs/outputs,
696           * that could cause issues in drivers down the line.
697           */
698          if (intrin->intrinsic != nir_intrinsic_store_deref) {
699             nir_def *zero =
700                nir_imm_zero(b, intrin->def.num_components,
701                             intrin->def.bit_size);
702             nir_def_rewrite_uses(&intrin->def,
703                                  zero);
704          }
705 
706          nir_instr_remove(&intrin->instr);
707          progress = true;
708          continue;
709       }
710 
711       offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL,
712                              state->type_size, &component_offset,
713                              bindless_type_size);
714 
715       nir_def *replacement = NULL;
716 
717       switch (intrin->intrinsic) {
718       case nir_intrinsic_load_deref:
719          replacement = lower_load(intrin, state, array_index, var, offset,
720                                   component_offset, deref->type);
721          break;
722 
723       case nir_intrinsic_store_deref:
724          lower_store(intrin, state, array_index, var, offset,
725                      component_offset, deref->type);
726          break;
727 
728       case nir_intrinsic_interp_deref_at_centroid:
729       case nir_intrinsic_interp_deref_at_sample:
730       case nir_intrinsic_interp_deref_at_offset:
731       case nir_intrinsic_interp_deref_at_vertex:
732          assert(array_index == NULL);
733          replacement = lower_interpolate_at(intrin, state, var, offset,
734                                             component_offset, deref->type);
735          break;
736 
737       default:
738          continue;
739       }
740 
741       if (replacement) {
742          nir_def_rewrite_uses(&intrin->def,
743                               replacement);
744       }
745       nir_instr_remove(&intrin->instr);
746       progress = true;
747    }
748 
749    return progress;
750 }
751 
752 static bool
nir_lower_io_impl(nir_function_impl * impl,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)753 nir_lower_io_impl(nir_function_impl *impl,
754                   nir_variable_mode modes,
755                   int (*type_size)(const struct glsl_type *, bool),
756                   nir_lower_io_options options)
757 {
758    struct lower_io_state state;
759    bool progress = false;
760 
761    state.builder = nir_builder_create(impl);
762    state.dead_ctx = ralloc_context(NULL);
763    state.modes = modes;
764    state.type_size = type_size;
765    state.options = options;
766 
767    ASSERTED nir_variable_mode supported_modes =
768       nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
769    assert(!(modes & ~supported_modes));
770 
771    nir_foreach_block(block, impl) {
772       progress |= nir_lower_io_block(block, &state);
773    }
774 
775    ralloc_free(state.dead_ctx);
776 
777    nir_metadata_preserve(impl, nir_metadata_none);
778 
779    return progress;
780 }
781 
782 /** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
783  *
784  * This pass is intended to be used for cross-stage shader I/O and driver-
785  * managed uniforms to turn deref-based access into a simpler model using
786  * locations or offsets.  For fragment shader inputs, it can optionally turn
787  * load_deref into an explicit interpolation using barycentrics coming from
788  * one of the load_barycentric_* intrinsics.  This pass requires that all
789  * deref chains are complete and contain no casts.
790  */
791 bool
nir_lower_io(nir_shader * shader,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)792 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
793              int (*type_size)(const struct glsl_type *, bool),
794              nir_lower_io_options options)
795 {
796    bool progress = false;
797 
798    nir_foreach_function_impl(impl, shader) {
799       progress |= nir_lower_io_impl(impl, modes, type_size, options);
800    }
801 
802    return progress;
803 }
804 
805 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)806 type_scalar_size_bytes(const struct glsl_type *type)
807 {
808    assert(glsl_type_is_vector_or_scalar(type) ||
809           glsl_type_is_matrix(type));
810    return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
811 }
812 
813 nir_def *
nir_build_addr_iadd(nir_builder * b,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,nir_def * offset)814 nir_build_addr_iadd(nir_builder *b, nir_def *addr,
815                     nir_address_format addr_format,
816                     nir_variable_mode modes,
817                     nir_def *offset)
818 {
819    assert(offset->num_components == 1);
820 
821    switch (addr_format) {
822    case nir_address_format_32bit_global:
823    case nir_address_format_64bit_global:
824    case nir_address_format_32bit_offset:
825       assert(addr->bit_size == offset->bit_size);
826       assert(addr->num_components == 1);
827       return nir_iadd(b, addr, offset);
828 
829    case nir_address_format_2x32bit_global: {
830       assert(addr->num_components == 2);
831       nir_def *lo = nir_channel(b, addr, 0);
832       nir_def *hi = nir_channel(b, addr, 1);
833       nir_def *res_lo = nir_iadd(b, lo, offset);
834       nir_def *carry = nir_b2i32(b, nir_ult(b, res_lo, lo));
835       nir_def *res_hi = nir_iadd(b, hi, carry);
836       return nir_vec2(b, res_lo, res_hi);
837    }
838 
839    case nir_address_format_32bit_offset_as_64bit:
840       assert(addr->num_components == 1);
841       assert(offset->bit_size == 32);
842       return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset));
843 
844    case nir_address_format_64bit_global_32bit_offset:
845    case nir_address_format_64bit_bounded_global:
846       assert(addr->num_components == 4);
847       assert(addr->bit_size == offset->bit_size);
848       return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3);
849 
850    case nir_address_format_32bit_index_offset:
851       assert(addr->num_components == 2);
852       assert(addr->bit_size == offset->bit_size);
853       return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1);
854 
855    case nir_address_format_32bit_index_offset_pack64:
856       assert(addr->num_components == 1);
857       assert(offset->bit_size == 32);
858       return nir_pack_64_2x32_split(b,
859                                     nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset),
860                                     nir_unpack_64_2x32_split_y(b, addr));
861 
862    case nir_address_format_vec2_index_32bit_offset:
863       assert(addr->num_components == 3);
864       assert(offset->bit_size == 32);
865       return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2);
866 
867    case nir_address_format_62bit_generic:
868       assert(addr->num_components == 1);
869       assert(addr->bit_size == 64);
870       assert(offset->bit_size == 64);
871       if (!(modes & ~(nir_var_function_temp |
872                       nir_var_shader_temp |
873                       nir_var_mem_shared))) {
874          /* If we're sure it's one of these modes, we can do an easy 32-bit
875           * addition and don't need to bother with 64-bit math.
876           */
877          nir_def *addr32 = nir_unpack_64_2x32_split_x(b, addr);
878          nir_def *type = nir_unpack_64_2x32_split_y(b, addr);
879          addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset));
880          return nir_pack_64_2x32_split(b, addr32, type);
881       } else {
882          return nir_iadd(b, addr, offset);
883       }
884 
885    case nir_address_format_logical:
886       unreachable("Unsupported address format");
887    }
888    unreachable("Invalid address format");
889 }
890 
891 static unsigned
addr_get_offset_bit_size(nir_def * addr,nir_address_format addr_format)892 addr_get_offset_bit_size(nir_def *addr, nir_address_format addr_format)
893 {
894    if (addr_format == nir_address_format_32bit_offset_as_64bit ||
895        addr_format == nir_address_format_32bit_index_offset_pack64)
896       return 32;
897    return addr->bit_size;
898 }
899 
900 nir_def *
nir_build_addr_iadd_imm(nir_builder * b,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,int64_t offset)901 nir_build_addr_iadd_imm(nir_builder *b, nir_def *addr,
902                         nir_address_format addr_format,
903                         nir_variable_mode modes,
904                         int64_t offset)
905 {
906    if (!offset)
907       return addr;
908 
909    return nir_build_addr_iadd(
910       b, addr, addr_format, modes,
911       nir_imm_intN_t(b, offset,
912                      addr_get_offset_bit_size(addr, addr_format)));
913 }
914 
915 static nir_def *
build_addr_for_var(nir_builder * b,nir_variable * var,nir_address_format addr_format)916 build_addr_for_var(nir_builder *b, nir_variable *var,
917                    nir_address_format addr_format)
918 {
919    assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared |
920                             nir_var_mem_task_payload |
921                             nir_var_mem_global |
922                             nir_var_shader_temp | nir_var_function_temp |
923                             nir_var_mem_push_const | nir_var_mem_constant));
924 
925    const unsigned num_comps = nir_address_format_num_components(addr_format);
926    const unsigned bit_size = nir_address_format_bit_size(addr_format);
927 
928    switch (addr_format) {
929    case nir_address_format_2x32bit_global:
930    case nir_address_format_32bit_global:
931    case nir_address_format_64bit_global: {
932       nir_def *base_addr;
933       switch (var->data.mode) {
934       case nir_var_shader_temp:
935          base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0);
936          break;
937 
938       case nir_var_function_temp:
939          base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1);
940          break;
941 
942       case nir_var_mem_constant:
943          base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size);
944          break;
945 
946       case nir_var_mem_shared:
947          base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size);
948          break;
949 
950       case nir_var_mem_global:
951          base_addr = nir_load_global_base_ptr(b, num_comps, bit_size);
952          break;
953 
954       default:
955          unreachable("Unsupported variable mode");
956       }
957 
958       return nir_build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode,
959                                      var->data.driver_location);
960    }
961 
962    case nir_address_format_32bit_offset:
963       assert(var->data.driver_location <= UINT32_MAX);
964       return nir_imm_int(b, var->data.driver_location);
965 
966    case nir_address_format_32bit_offset_as_64bit:
967       assert(var->data.driver_location <= UINT32_MAX);
968       return nir_imm_int64(b, var->data.driver_location);
969 
970    case nir_address_format_62bit_generic:
971       switch (var->data.mode) {
972       case nir_var_shader_temp:
973       case nir_var_function_temp:
974          assert(var->data.driver_location <= UINT32_MAX);
975          return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64);
976 
977       case nir_var_mem_shared:
978          assert(var->data.driver_location <= UINT32_MAX);
979          return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64);
980 
981       case nir_var_mem_global:
982          return nir_iadd_imm(b, nir_load_global_base_ptr(b, num_comps, bit_size),
983                              var->data.driver_location);
984 
985       default:
986          unreachable("Unsupported variable mode");
987       }
988 
989    default:
990       unreachable("Unsupported address format");
991    }
992 }
993 
994 static nir_def *
build_runtime_addr_mode_check(nir_builder * b,nir_def * addr,nir_address_format addr_format,nir_variable_mode mode)995 build_runtime_addr_mode_check(nir_builder *b, nir_def *addr,
996                               nir_address_format addr_format,
997                               nir_variable_mode mode)
998 {
999    /* The compile-time check failed; do a run-time check */
1000    switch (addr_format) {
1001    case nir_address_format_62bit_generic: {
1002       assert(addr->num_components == 1);
1003       assert(addr->bit_size == 64);
1004       nir_def *mode_enum = nir_ushr_imm(b, addr, 62);
1005       switch (mode) {
1006       case nir_var_function_temp:
1007       case nir_var_shader_temp:
1008          return nir_ieq_imm(b, mode_enum, 0x2);
1009 
1010       case nir_var_mem_shared:
1011          return nir_ieq_imm(b, mode_enum, 0x1);
1012 
1013       case nir_var_mem_global:
1014          return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0),
1015                         nir_ieq_imm(b, mode_enum, 0x3));
1016 
1017       default:
1018          unreachable("Invalid mode check intrinsic");
1019       }
1020    }
1021 
1022    default:
1023       unreachable("Unsupported address mode");
1024    }
1025 }
1026 
1027 unsigned
nir_address_format_bit_size(nir_address_format addr_format)1028 nir_address_format_bit_size(nir_address_format addr_format)
1029 {
1030    switch (addr_format) {
1031    case nir_address_format_32bit_global:
1032       return 32;
1033    case nir_address_format_2x32bit_global:
1034       return 32;
1035    case nir_address_format_64bit_global:
1036       return 64;
1037    case nir_address_format_64bit_global_32bit_offset:
1038       return 32;
1039    case nir_address_format_64bit_bounded_global:
1040       return 32;
1041    case nir_address_format_32bit_index_offset:
1042       return 32;
1043    case nir_address_format_32bit_index_offset_pack64:
1044       return 64;
1045    case nir_address_format_vec2_index_32bit_offset:
1046       return 32;
1047    case nir_address_format_62bit_generic:
1048       return 64;
1049    case nir_address_format_32bit_offset:
1050       return 32;
1051    case nir_address_format_32bit_offset_as_64bit:
1052       return 64;
1053    case nir_address_format_logical:
1054       return 32;
1055    }
1056    unreachable("Invalid address format");
1057 }
1058 
1059 unsigned
nir_address_format_num_components(nir_address_format addr_format)1060 nir_address_format_num_components(nir_address_format addr_format)
1061 {
1062    switch (addr_format) {
1063    case nir_address_format_32bit_global:
1064       return 1;
1065    case nir_address_format_2x32bit_global:
1066       return 2;
1067    case nir_address_format_64bit_global:
1068       return 1;
1069    case nir_address_format_64bit_global_32bit_offset:
1070       return 4;
1071    case nir_address_format_64bit_bounded_global:
1072       return 4;
1073    case nir_address_format_32bit_index_offset:
1074       return 2;
1075    case nir_address_format_32bit_index_offset_pack64:
1076       return 1;
1077    case nir_address_format_vec2_index_32bit_offset:
1078       return 3;
1079    case nir_address_format_62bit_generic:
1080       return 1;
1081    case nir_address_format_32bit_offset:
1082       return 1;
1083    case nir_address_format_32bit_offset_as_64bit:
1084       return 1;
1085    case nir_address_format_logical:
1086       return 1;
1087    }
1088    unreachable("Invalid address format");
1089 }
1090 
1091 static nir_def *
addr_to_index(nir_builder * b,nir_def * addr,nir_address_format addr_format)1092 addr_to_index(nir_builder *b, nir_def *addr,
1093               nir_address_format addr_format)
1094 {
1095    switch (addr_format) {
1096    case nir_address_format_32bit_index_offset:
1097       assert(addr->num_components == 2);
1098       return nir_channel(b, addr, 0);
1099    case nir_address_format_32bit_index_offset_pack64:
1100       return nir_unpack_64_2x32_split_y(b, addr);
1101    case nir_address_format_vec2_index_32bit_offset:
1102       assert(addr->num_components == 3);
1103       return nir_trim_vector(b, addr, 2);
1104    default:
1105       unreachable("Invalid address format");
1106    }
1107 }
1108 
1109 static nir_def *
addr_to_offset(nir_builder * b,nir_def * addr,nir_address_format addr_format)1110 addr_to_offset(nir_builder *b, nir_def *addr,
1111                nir_address_format addr_format)
1112 {
1113    switch (addr_format) {
1114    case nir_address_format_32bit_index_offset:
1115       assert(addr->num_components == 2);
1116       return nir_channel(b, addr, 1);
1117    case nir_address_format_32bit_index_offset_pack64:
1118       return nir_unpack_64_2x32_split_x(b, addr);
1119    case nir_address_format_vec2_index_32bit_offset:
1120       assert(addr->num_components == 3);
1121       return nir_channel(b, addr, 2);
1122    case nir_address_format_32bit_offset:
1123       return addr;
1124    case nir_address_format_32bit_offset_as_64bit:
1125    case nir_address_format_62bit_generic:
1126       return nir_u2u32(b, addr);
1127    default:
1128       unreachable("Invalid address format");
1129    }
1130 }
1131 
1132 /** Returns true if the given address format resolves to a global address */
1133 static bool
addr_format_is_global(nir_address_format addr_format,nir_variable_mode mode)1134 addr_format_is_global(nir_address_format addr_format,
1135                       nir_variable_mode mode)
1136 {
1137    if (addr_format == nir_address_format_62bit_generic)
1138       return mode == nir_var_mem_global;
1139 
1140    return addr_format == nir_address_format_32bit_global ||
1141           addr_format == nir_address_format_2x32bit_global ||
1142           addr_format == nir_address_format_64bit_global ||
1143           addr_format == nir_address_format_64bit_global_32bit_offset ||
1144           addr_format == nir_address_format_64bit_bounded_global;
1145 }
1146 
1147 static bool
addr_format_is_offset(nir_address_format addr_format,nir_variable_mode mode)1148 addr_format_is_offset(nir_address_format addr_format,
1149                       nir_variable_mode mode)
1150 {
1151    if (addr_format == nir_address_format_62bit_generic)
1152       return mode != nir_var_mem_global;
1153 
1154    return addr_format == nir_address_format_32bit_offset ||
1155           addr_format == nir_address_format_32bit_offset_as_64bit;
1156 }
1157 
1158 static nir_def *
addr_to_global(nir_builder * b,nir_def * addr,nir_address_format addr_format)1159 addr_to_global(nir_builder *b, nir_def *addr,
1160                nir_address_format addr_format)
1161 {
1162    switch (addr_format) {
1163    case nir_address_format_32bit_global:
1164    case nir_address_format_64bit_global:
1165    case nir_address_format_62bit_generic:
1166       assert(addr->num_components == 1);
1167       return addr;
1168 
1169    case nir_address_format_2x32bit_global:
1170       assert(addr->num_components == 2);
1171       return addr;
1172 
1173    case nir_address_format_64bit_global_32bit_offset:
1174    case nir_address_format_64bit_bounded_global:
1175       assert(addr->num_components == 4);
1176       return nir_iadd(b, nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)),
1177                       nir_u2u64(b, nir_channel(b, addr, 3)));
1178 
1179    case nir_address_format_32bit_index_offset:
1180    case nir_address_format_32bit_index_offset_pack64:
1181    case nir_address_format_vec2_index_32bit_offset:
1182    case nir_address_format_32bit_offset:
1183    case nir_address_format_32bit_offset_as_64bit:
1184    case nir_address_format_logical:
1185       unreachable("Cannot get a 64-bit address with this address format");
1186    }
1187 
1188    unreachable("Invalid address format");
1189 }
1190 
1191 static bool
addr_format_needs_bounds_check(nir_address_format addr_format)1192 addr_format_needs_bounds_check(nir_address_format addr_format)
1193 {
1194    return addr_format == nir_address_format_64bit_bounded_global;
1195 }
1196 
1197 static nir_def *
addr_is_in_bounds(nir_builder * b,nir_def * addr,nir_address_format addr_format,unsigned size)1198 addr_is_in_bounds(nir_builder *b, nir_def *addr,
1199                   nir_address_format addr_format, unsigned size)
1200 {
1201    assert(addr_format == nir_address_format_64bit_bounded_global);
1202    assert(addr->num_components == 4);
1203    assert(size > 0);
1204    return nir_ult(b, nir_iadd_imm(b, nir_channel(b, addr, 3), size - 1),
1205                   nir_channel(b, addr, 2));
1206 }
1207 
1208 static void
nir_get_explicit_deref_range(nir_deref_instr * deref,nir_address_format addr_format,uint32_t * out_base,uint32_t * out_range)1209 nir_get_explicit_deref_range(nir_deref_instr *deref,
1210                              nir_address_format addr_format,
1211                              uint32_t *out_base,
1212                              uint32_t *out_range)
1213 {
1214    uint32_t base = 0;
1215    uint32_t range = glsl_get_explicit_size(deref->type, false);
1216 
1217    while (true) {
1218       nir_deref_instr *parent = nir_deref_instr_parent(deref);
1219 
1220       switch (deref->deref_type) {
1221       case nir_deref_type_array:
1222       case nir_deref_type_array_wildcard:
1223       case nir_deref_type_ptr_as_array: {
1224          const unsigned stride = nir_deref_instr_array_stride(deref);
1225          if (stride == 0)
1226             goto fail;
1227 
1228          if (!parent)
1229             goto fail;
1230 
1231          if (deref->deref_type != nir_deref_type_array_wildcard &&
1232              nir_src_is_const(deref->arr.index)) {
1233             base += stride * nir_src_as_uint(deref->arr.index);
1234          } else {
1235             if (glsl_get_length(parent->type) == 0)
1236                goto fail;
1237             range += stride * (glsl_get_length(parent->type) - 1);
1238          }
1239          break;
1240       }
1241 
1242       case nir_deref_type_struct: {
1243          if (!parent)
1244             goto fail;
1245 
1246          base += glsl_get_struct_field_offset(parent->type, deref->strct.index);
1247          break;
1248       }
1249 
1250       case nir_deref_type_cast: {
1251          nir_instr *parent_instr = deref->parent.ssa->parent_instr;
1252 
1253          switch (parent_instr->type) {
1254          case nir_instr_type_load_const: {
1255             nir_load_const_instr *load = nir_instr_as_load_const(parent_instr);
1256 
1257             switch (addr_format) {
1258             case nir_address_format_32bit_offset:
1259                base += load->value[1].u32;
1260                break;
1261             case nir_address_format_32bit_index_offset:
1262                base += load->value[1].u32;
1263                break;
1264             case nir_address_format_vec2_index_32bit_offset:
1265                base += load->value[2].u32;
1266                break;
1267             default:
1268                goto fail;
1269             }
1270 
1271             *out_base = base;
1272             *out_range = range;
1273             return;
1274          }
1275 
1276          case nir_instr_type_intrinsic: {
1277             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr);
1278             switch (intr->intrinsic) {
1279             case nir_intrinsic_load_vulkan_descriptor:
1280                /* Assume that a load_vulkan_descriptor won't contribute to an
1281                 * offset within the resource.
1282                 */
1283                break;
1284             default:
1285                goto fail;
1286             }
1287 
1288             *out_base = base;
1289             *out_range = range;
1290             return;
1291          }
1292 
1293          default:
1294             goto fail;
1295          }
1296       }
1297 
1298       default:
1299          goto fail;
1300       }
1301 
1302       deref = parent;
1303    }
1304 
1305 fail:
1306    *out_base = 0;
1307    *out_range = ~0;
1308 }
1309 
1310 static nir_variable_mode
canonicalize_generic_modes(nir_variable_mode modes)1311 canonicalize_generic_modes(nir_variable_mode modes)
1312 {
1313    assert(modes != 0);
1314    if (util_bitcount(modes) == 1)
1315       return modes;
1316 
1317    assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp |
1318                       nir_var_mem_shared | nir_var_mem_global)));
1319 
1320    /* Canonicalize by converting shader_temp to function_temp */
1321    if (modes & nir_var_shader_temp) {
1322       modes &= ~nir_var_shader_temp;
1323       modes |= nir_var_function_temp;
1324    }
1325 
1326    return modes;
1327 }
1328 
1329 static nir_intrinsic_op
get_store_global_op_from_addr_format(nir_address_format addr_format)1330 get_store_global_op_from_addr_format(nir_address_format addr_format)
1331 {
1332    if (addr_format != nir_address_format_2x32bit_global)
1333       return nir_intrinsic_store_global;
1334    else
1335       return nir_intrinsic_store_global_2x32;
1336 }
1337 
1338 static nir_intrinsic_op
get_load_global_op_from_addr_format(nir_address_format addr_format)1339 get_load_global_op_from_addr_format(nir_address_format addr_format)
1340 {
1341    if (addr_format != nir_address_format_2x32bit_global)
1342       return nir_intrinsic_load_global;
1343    else
1344       return nir_intrinsic_load_global_2x32;
1345 }
1346 
1347 static nir_intrinsic_op
get_load_global_constant_op_from_addr_format(nir_address_format addr_format)1348 get_load_global_constant_op_from_addr_format(nir_address_format addr_format)
1349 {
1350    if (addr_format != nir_address_format_2x32bit_global)
1351       return nir_intrinsic_load_global_constant;
1352    else
1353       return nir_intrinsic_load_global_2x32; /* no dedicated op, fallback */
1354 }
1355 
1356 static nir_def *
build_explicit_io_load(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,unsigned num_components)1357 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
1358                        nir_def *addr, nir_address_format addr_format,
1359                        nir_variable_mode modes,
1360                        uint32_t align_mul, uint32_t align_offset,
1361                        unsigned num_components)
1362 {
1363    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1364    modes = canonicalize_generic_modes(modes);
1365 
1366    if (util_bitcount(modes) > 1) {
1367       if (addr_format_is_global(addr_format, modes)) {
1368          return build_explicit_io_load(b, intrin, addr, addr_format,
1369                                        nir_var_mem_global,
1370                                        align_mul, align_offset,
1371                                        num_components);
1372       } else if (modes & nir_var_function_temp) {
1373          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1374                                                       nir_var_function_temp));
1375          nir_def *res1 =
1376             build_explicit_io_load(b, intrin, addr, addr_format,
1377                                    nir_var_function_temp,
1378                                    align_mul, align_offset,
1379                                    num_components);
1380          nir_push_else(b, NULL);
1381          nir_def *res2 =
1382             build_explicit_io_load(b, intrin, addr, addr_format,
1383                                    modes & ~nir_var_function_temp,
1384                                    align_mul, align_offset,
1385                                    num_components);
1386          nir_pop_if(b, NULL);
1387          return nir_if_phi(b, res1, res2);
1388       } else {
1389          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1390                                                       nir_var_mem_shared));
1391          assert(modes & nir_var_mem_shared);
1392          nir_def *res1 =
1393             build_explicit_io_load(b, intrin, addr, addr_format,
1394                                    nir_var_mem_shared,
1395                                    align_mul, align_offset,
1396                                    num_components);
1397          nir_push_else(b, NULL);
1398          assert(modes & nir_var_mem_global);
1399          nir_def *res2 =
1400             build_explicit_io_load(b, intrin, addr, addr_format,
1401                                    nir_var_mem_global,
1402                                    align_mul, align_offset,
1403                                    num_components);
1404          nir_pop_if(b, NULL);
1405          return nir_if_phi(b, res1, res2);
1406       }
1407    }
1408 
1409    assert(util_bitcount(modes) == 1);
1410    const nir_variable_mode mode = modes;
1411 
1412    nir_intrinsic_op op;
1413    switch (intrin->intrinsic) {
1414    case nir_intrinsic_load_deref:
1415       switch (mode) {
1416       case nir_var_mem_ubo:
1417          if (addr_format == nir_address_format_64bit_global_32bit_offset)
1418             op = nir_intrinsic_load_global_constant_offset;
1419          else if (addr_format == nir_address_format_64bit_bounded_global)
1420             op = nir_intrinsic_load_global_constant_bounded;
1421          else if (addr_format_is_global(addr_format, mode))
1422             op = nir_intrinsic_load_global_constant;
1423          else
1424             op = nir_intrinsic_load_ubo;
1425          break;
1426       case nir_var_mem_ssbo:
1427          if (addr_format_is_global(addr_format, mode))
1428             op = nir_intrinsic_load_global;
1429          else
1430             op = nir_intrinsic_load_ssbo;
1431          break;
1432       case nir_var_mem_global:
1433          assert(addr_format_is_global(addr_format, mode));
1434          op = get_load_global_op_from_addr_format(addr_format);
1435          break;
1436       case nir_var_uniform:
1437          assert(addr_format_is_offset(addr_format, mode));
1438          assert(b->shader->info.stage == MESA_SHADER_KERNEL);
1439          op = nir_intrinsic_load_kernel_input;
1440          break;
1441       case nir_var_mem_shared:
1442          assert(addr_format_is_offset(addr_format, mode));
1443          op = nir_intrinsic_load_shared;
1444          break;
1445       case nir_var_mem_task_payload:
1446          assert(addr_format_is_offset(addr_format, mode));
1447          op = nir_intrinsic_load_task_payload;
1448          break;
1449       case nir_var_shader_temp:
1450       case nir_var_function_temp:
1451          if (addr_format_is_offset(addr_format, mode)) {
1452             op = nir_intrinsic_load_scratch;
1453          } else {
1454             assert(addr_format_is_global(addr_format, mode));
1455             op = get_load_global_op_from_addr_format(addr_format);
1456          }
1457          break;
1458       case nir_var_mem_push_const:
1459          assert(addr_format == nir_address_format_32bit_offset);
1460          op = nir_intrinsic_load_push_constant;
1461          break;
1462       case nir_var_mem_constant:
1463          if (addr_format_is_offset(addr_format, mode)) {
1464             op = nir_intrinsic_load_constant;
1465          } else {
1466             assert(addr_format_is_global(addr_format, mode));
1467             op = get_load_global_constant_op_from_addr_format(addr_format);
1468          }
1469          break;
1470       default:
1471          unreachable("Unsupported explicit IO variable mode");
1472       }
1473       break;
1474 
1475    case nir_intrinsic_load_deref_block_intel:
1476       switch (mode) {
1477       case nir_var_mem_ssbo:
1478          if (addr_format_is_global(addr_format, mode))
1479             op = nir_intrinsic_load_global_block_intel;
1480          else
1481             op = nir_intrinsic_load_ssbo_block_intel;
1482          break;
1483       case nir_var_mem_global:
1484          op = nir_intrinsic_load_global_block_intel;
1485          break;
1486       case nir_var_mem_shared:
1487          op = nir_intrinsic_load_shared_block_intel;
1488          break;
1489       default:
1490          unreachable("Unsupported explicit IO variable mode");
1491       }
1492       break;
1493 
1494    default:
1495       unreachable("Invalid intrinsic");
1496    }
1497 
1498    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
1499 
1500    if (op == nir_intrinsic_load_global_constant_offset) {
1501       assert(addr_format == nir_address_format_64bit_global_32bit_offset);
1502       load->src[0] = nir_src_for_ssa(
1503          nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
1504       load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1505    } else if (op == nir_intrinsic_load_global_constant_bounded) {
1506       assert(addr_format == nir_address_format_64bit_bounded_global);
1507       load->src[0] = nir_src_for_ssa(
1508          nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
1509       load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1510       load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2));
1511    } else if (addr_format_is_global(addr_format, mode)) {
1512       load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1513    } else if (addr_format_is_offset(addr_format, mode)) {
1514       assert(addr->num_components == 1);
1515       load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1516    } else {
1517       load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1518       load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1519    }
1520 
1521    if (nir_intrinsic_has_access(load))
1522       nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
1523 
1524    if (op == nir_intrinsic_load_constant) {
1525       nir_intrinsic_set_base(load, 0);
1526       nir_intrinsic_set_range(load, b->shader->constant_data_size);
1527    } else if (op == nir_intrinsic_load_kernel_input) {
1528       nir_intrinsic_set_base(load, 0);
1529       nir_intrinsic_set_range(load, b->shader->num_uniforms);
1530    } else if (mode == nir_var_mem_push_const) {
1531       /* Push constants are required to be able to be chased back to the
1532        * variable so we can provide a base/range.
1533        */
1534       nir_variable *var = nir_deref_instr_get_variable(deref);
1535       nir_intrinsic_set_base(load, 0);
1536       nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false));
1537    }
1538 
1539    unsigned bit_size = intrin->def.bit_size;
1540    if (bit_size == 1) {
1541       /* TODO: Make the native bool bit_size an option. */
1542       bit_size = 32;
1543    }
1544 
1545    if (nir_intrinsic_has_align(load))
1546       nir_intrinsic_set_align(load, align_mul, align_offset);
1547 
1548    if (nir_intrinsic_has_range_base(load)) {
1549       unsigned base, range;
1550       nir_get_explicit_deref_range(deref, addr_format, &base, &range);
1551       nir_intrinsic_set_range_base(load, base);
1552       nir_intrinsic_set_range(load, range);
1553    }
1554 
1555    load->num_components = num_components;
1556    nir_def_init(&load->instr, &load->def, num_components, bit_size);
1557 
1558    assert(bit_size % 8 == 0);
1559 
1560    nir_def *result;
1561    if (addr_format_needs_bounds_check(addr_format) &&
1562        op != nir_intrinsic_load_global_constant_bounded) {
1563       /* We don't need to bounds-check global_constant_bounded because bounds
1564        * checking is handled by the intrinsic itself.
1565        *
1566        * The Vulkan spec for robustBufferAccess gives us quite a few options
1567        * as to what we can do with an OOB read.  Unfortunately, returning
1568        * undefined values isn't one of them so we return an actual zero.
1569        */
1570       nir_def *zero = nir_imm_zero(b, load->num_components, bit_size);
1571 
1572       /* TODO: Better handle block_intel. */
1573       assert(load->num_components == 1);
1574       const unsigned load_size = bit_size / 8;
1575       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
1576 
1577       nir_builder_instr_insert(b, &load->instr);
1578 
1579       nir_pop_if(b, NULL);
1580 
1581       result = nir_if_phi(b, &load->def, zero);
1582    } else {
1583       nir_builder_instr_insert(b, &load->instr);
1584       result = &load->def;
1585    }
1586 
1587    if (intrin->def.bit_size == 1) {
1588       /* For shared, we can go ahead and use NIR's and/or the back-end's
1589        * standard encoding for booleans rather than forcing a 0/1 boolean.
1590        * This should save an instruction or two.
1591        */
1592       if (mode == nir_var_mem_shared ||
1593           mode == nir_var_shader_temp ||
1594           mode == nir_var_function_temp)
1595          result = nir_b2b1(b, result);
1596       else
1597          result = nir_i2b(b, result);
1598    }
1599 
1600    return result;
1601 }
1602 
1603 static void
build_explicit_io_store(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,nir_def * value,nir_component_mask_t write_mask)1604 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
1605                         nir_def *addr, nir_address_format addr_format,
1606                         nir_variable_mode modes,
1607                         uint32_t align_mul, uint32_t align_offset,
1608                         nir_def *value, nir_component_mask_t write_mask)
1609 {
1610    modes = canonicalize_generic_modes(modes);
1611 
1612    if (util_bitcount(modes) > 1) {
1613       if (addr_format_is_global(addr_format, modes)) {
1614          build_explicit_io_store(b, intrin, addr, addr_format,
1615                                  nir_var_mem_global,
1616                                  align_mul, align_offset,
1617                                  value, write_mask);
1618       } else if (modes & nir_var_function_temp) {
1619          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1620                                                       nir_var_function_temp));
1621          build_explicit_io_store(b, intrin, addr, addr_format,
1622                                  nir_var_function_temp,
1623                                  align_mul, align_offset,
1624                                  value, write_mask);
1625          nir_push_else(b, NULL);
1626          build_explicit_io_store(b, intrin, addr, addr_format,
1627                                  modes & ~nir_var_function_temp,
1628                                  align_mul, align_offset,
1629                                  value, write_mask);
1630          nir_pop_if(b, NULL);
1631       } else {
1632          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1633                                                       nir_var_mem_shared));
1634          assert(modes & nir_var_mem_shared);
1635          build_explicit_io_store(b, intrin, addr, addr_format,
1636                                  nir_var_mem_shared,
1637                                  align_mul, align_offset,
1638                                  value, write_mask);
1639          nir_push_else(b, NULL);
1640          assert(modes & nir_var_mem_global);
1641          build_explicit_io_store(b, intrin, addr, addr_format,
1642                                  nir_var_mem_global,
1643                                  align_mul, align_offset,
1644                                  value, write_mask);
1645          nir_pop_if(b, NULL);
1646       }
1647       return;
1648    }
1649 
1650    assert(util_bitcount(modes) == 1);
1651    const nir_variable_mode mode = modes;
1652 
1653    nir_intrinsic_op op;
1654    switch (intrin->intrinsic) {
1655    case nir_intrinsic_store_deref:
1656       assert(write_mask != 0);
1657 
1658       switch (mode) {
1659       case nir_var_mem_ssbo:
1660          if (addr_format_is_global(addr_format, mode))
1661             op = get_store_global_op_from_addr_format(addr_format);
1662          else
1663             op = nir_intrinsic_store_ssbo;
1664          break;
1665       case nir_var_mem_global:
1666          assert(addr_format_is_global(addr_format, mode));
1667          op = get_store_global_op_from_addr_format(addr_format);
1668          break;
1669       case nir_var_mem_shared:
1670          assert(addr_format_is_offset(addr_format, mode));
1671          op = nir_intrinsic_store_shared;
1672          break;
1673       case nir_var_mem_task_payload:
1674          assert(addr_format_is_offset(addr_format, mode));
1675          op = nir_intrinsic_store_task_payload;
1676          break;
1677       case nir_var_shader_temp:
1678       case nir_var_function_temp:
1679          if (addr_format_is_offset(addr_format, mode)) {
1680             op = nir_intrinsic_store_scratch;
1681          } else {
1682             assert(addr_format_is_global(addr_format, mode));
1683             op = get_store_global_op_from_addr_format(addr_format);
1684          }
1685          break;
1686       default:
1687          unreachable("Unsupported explicit IO variable mode");
1688       }
1689       break;
1690 
1691    case nir_intrinsic_store_deref_block_intel:
1692       assert(write_mask == 0);
1693 
1694       switch (mode) {
1695       case nir_var_mem_ssbo:
1696          if (addr_format_is_global(addr_format, mode))
1697             op = nir_intrinsic_store_global_block_intel;
1698          else
1699             op = nir_intrinsic_store_ssbo_block_intel;
1700          break;
1701       case nir_var_mem_global:
1702          op = nir_intrinsic_store_global_block_intel;
1703          break;
1704       case nir_var_mem_shared:
1705          op = nir_intrinsic_store_shared_block_intel;
1706          break;
1707       default:
1708          unreachable("Unsupported explicit IO variable mode");
1709       }
1710       break;
1711 
1712    default:
1713       unreachable("Invalid intrinsic");
1714    }
1715 
1716    nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
1717 
1718    if (value->bit_size == 1) {
1719       /* For shared, we can go ahead and use NIR's and/or the back-end's
1720        * standard encoding for booleans rather than forcing a 0/1 boolean.
1721        * This should save an instruction or two.
1722        *
1723        * TODO: Make the native bool bit_size an option.
1724        */
1725       if (mode == nir_var_mem_shared ||
1726           mode == nir_var_shader_temp ||
1727           mode == nir_var_function_temp)
1728          value = nir_b2b32(b, value);
1729       else
1730          value = nir_b2iN(b, value, 32);
1731    }
1732 
1733    store->src[0] = nir_src_for_ssa(value);
1734    if (addr_format_is_global(addr_format, mode)) {
1735       store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1736    } else if (addr_format_is_offset(addr_format, mode)) {
1737       assert(addr->num_components == 1);
1738       store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1739    } else {
1740       store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1741       store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1742    }
1743 
1744    nir_intrinsic_set_write_mask(store, write_mask);
1745 
1746    if (nir_intrinsic_has_access(store))
1747       nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
1748 
1749    nir_intrinsic_set_align(store, align_mul, align_offset);
1750 
1751    assert(value->num_components == 1 ||
1752           value->num_components == intrin->num_components);
1753    store->num_components = value->num_components;
1754 
1755    assert(value->bit_size % 8 == 0);
1756 
1757    if (addr_format_needs_bounds_check(addr_format)) {
1758       /* TODO: Better handle block_intel. */
1759       assert(store->num_components == 1);
1760       const unsigned store_size = value->bit_size / 8;
1761       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
1762 
1763       nir_builder_instr_insert(b, &store->instr);
1764 
1765       nir_pop_if(b, NULL);
1766    } else {
1767       nir_builder_instr_insert(b, &store->instr);
1768    }
1769 }
1770 
1771 static nir_def *
build_explicit_io_atomic(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes)1772 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
1773                          nir_def *addr, nir_address_format addr_format,
1774                          nir_variable_mode modes)
1775 {
1776    modes = canonicalize_generic_modes(modes);
1777 
1778    if (util_bitcount(modes) > 1) {
1779       if (addr_format_is_global(addr_format, modes)) {
1780          return build_explicit_io_atomic(b, intrin, addr, addr_format,
1781                                          nir_var_mem_global);
1782       } else if (modes & nir_var_function_temp) {
1783          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1784                                                       nir_var_function_temp));
1785          nir_def *res1 =
1786             build_explicit_io_atomic(b, intrin, addr, addr_format,
1787                                      nir_var_function_temp);
1788          nir_push_else(b, NULL);
1789          nir_def *res2 =
1790             build_explicit_io_atomic(b, intrin, addr, addr_format,
1791                                      modes & ~nir_var_function_temp);
1792          nir_pop_if(b, NULL);
1793          return nir_if_phi(b, res1, res2);
1794       } else {
1795          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1796                                                       nir_var_mem_shared));
1797          assert(modes & nir_var_mem_shared);
1798          nir_def *res1 =
1799             build_explicit_io_atomic(b, intrin, addr, addr_format,
1800                                      nir_var_mem_shared);
1801          nir_push_else(b, NULL);
1802          assert(modes & nir_var_mem_global);
1803          nir_def *res2 =
1804             build_explicit_io_atomic(b, intrin, addr, addr_format,
1805                                      nir_var_mem_global);
1806          nir_pop_if(b, NULL);
1807          return nir_if_phi(b, res1, res2);
1808       }
1809    }
1810 
1811    assert(util_bitcount(modes) == 1);
1812    const nir_variable_mode mode = modes;
1813 
1814    const unsigned num_data_srcs =
1815       nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
1816 
1817    nir_intrinsic_op op;
1818    switch (mode) {
1819    case nir_var_mem_ssbo:
1820       if (addr_format_is_global(addr_format, mode))
1821          op = global_atomic_for_deref(addr_format, intrin->intrinsic);
1822       else
1823          op = ssbo_atomic_for_deref(intrin->intrinsic);
1824       break;
1825    case nir_var_mem_global:
1826       assert(addr_format_is_global(addr_format, mode));
1827       op = global_atomic_for_deref(addr_format, intrin->intrinsic);
1828       break;
1829    case nir_var_mem_shared:
1830       assert(addr_format_is_offset(addr_format, mode));
1831       op = shared_atomic_for_deref(intrin->intrinsic);
1832       break;
1833    case nir_var_mem_task_payload:
1834       assert(addr_format_is_offset(addr_format, mode));
1835       op = task_payload_atomic_for_deref(intrin->intrinsic);
1836       break;
1837    default:
1838       unreachable("Unsupported explicit IO variable mode");
1839    }
1840 
1841    nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
1842    nir_intrinsic_set_atomic_op(atomic, nir_intrinsic_atomic_op(intrin));
1843 
1844    unsigned src = 0;
1845    if (addr_format_is_global(addr_format, mode)) {
1846       atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1847    } else if (addr_format_is_offset(addr_format, mode)) {
1848       assert(addr->num_components == 1);
1849       atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1850    } else {
1851       atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1852       atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1853    }
1854    for (unsigned i = 0; i < num_data_srcs; i++) {
1855       atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
1856    }
1857 
1858    /* Global atomics don't have access flags because they assume that the
1859     * address may be non-uniform.
1860     */
1861    if (nir_intrinsic_has_access(atomic))
1862       nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
1863 
1864    assert(intrin->def.num_components == 1);
1865    nir_def_init(&atomic->instr, &atomic->def, 1,
1866                 intrin->def.bit_size);
1867 
1868    assert(atomic->def.bit_size % 8 == 0);
1869 
1870    if (addr_format_needs_bounds_check(addr_format)) {
1871       const unsigned atomic_size = atomic->def.bit_size / 8;
1872       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
1873 
1874       nir_builder_instr_insert(b, &atomic->instr);
1875 
1876       nir_pop_if(b, NULL);
1877       return nir_if_phi(b, &atomic->def,
1878                         nir_undef(b, 1, atomic->def.bit_size));
1879    } else {
1880       nir_builder_instr_insert(b, &atomic->instr);
1881       return &atomic->def;
1882    }
1883 }
1884 
1885 nir_def *
nir_explicit_io_address_from_deref(nir_builder * b,nir_deref_instr * deref,nir_def * base_addr,nir_address_format addr_format)1886 nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
1887                                    nir_def *base_addr,
1888                                    nir_address_format addr_format)
1889 {
1890    switch (deref->deref_type) {
1891    case nir_deref_type_var:
1892       return build_addr_for_var(b, deref->var, addr_format);
1893 
1894    case nir_deref_type_ptr_as_array:
1895    case nir_deref_type_array: {
1896       unsigned stride = nir_deref_instr_array_stride(deref);
1897       assert(stride > 0);
1898 
1899       unsigned offset_bit_size = addr_get_offset_bit_size(base_addr, addr_format);
1900       nir_def *index = deref->arr.index.ssa;
1901       nir_def *offset;
1902 
1903       /* If the access chain has been declared in-bounds, then we know it doesn't
1904        * overflow the type.  For nir_deref_type_array, this implies it cannot be
1905        * negative. Also, since types in NIR have a maximum 32-bit size, we know the
1906        * final result will fit in a 32-bit value so we can convert the index to
1907        * 32-bit before multiplying and save ourselves from a 64-bit multiply.
1908        */
1909       if (deref->arr.in_bounds && deref->deref_type == nir_deref_type_array) {
1910          index = nir_u2u32(b, index);
1911          offset = nir_u2uN(b, nir_amul_imm(b, index, stride), offset_bit_size);
1912       } else {
1913          index = nir_i2iN(b, index, offset_bit_size);
1914          offset = nir_amul_imm(b, index, stride);
1915       }
1916 
1917       return nir_build_addr_iadd(b, base_addr, addr_format,
1918                                  deref->modes, offset);
1919    }
1920 
1921    case nir_deref_type_array_wildcard:
1922       unreachable("Wildcards should be lowered by now");
1923       break;
1924 
1925    case nir_deref_type_struct: {
1926       nir_deref_instr *parent = nir_deref_instr_parent(deref);
1927       int offset = glsl_get_struct_field_offset(parent->type,
1928                                                 deref->strct.index);
1929       assert(offset >= 0);
1930       return nir_build_addr_iadd_imm(b, base_addr, addr_format,
1931                                      deref->modes, offset);
1932    }
1933 
1934    case nir_deref_type_cast:
1935       /* Nothing to do here */
1936       return base_addr;
1937    }
1938 
1939    unreachable("Invalid NIR deref type");
1940 }
1941 
1942 void
nir_lower_explicit_io_instr(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format)1943 nir_lower_explicit_io_instr(nir_builder *b,
1944                             nir_intrinsic_instr *intrin,
1945                             nir_def *addr,
1946                             nir_address_format addr_format)
1947 {
1948    b->cursor = nir_after_instr(&intrin->instr);
1949 
1950    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1951    unsigned vec_stride = glsl_get_explicit_stride(deref->type);
1952    unsigned scalar_size = type_scalar_size_bytes(deref->type);
1953    if (vec_stride == 0) {
1954       vec_stride = scalar_size;
1955    } else {
1956       assert(glsl_type_is_vector(deref->type));
1957       assert(vec_stride >= scalar_size);
1958    }
1959 
1960    uint32_t align_mul, align_offset;
1961    if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) {
1962       /* If we don't have an alignment from the deref, assume scalar */
1963       align_mul = scalar_size;
1964       align_offset = 0;
1965    }
1966 
1967    /* In order for bounds checking to be correct as per the Vulkan spec,
1968     * we need to check at the individual component granularity.  Prior to
1969     * robustness2, we're technically allowed to be sloppy by 16B.  Even with
1970     * robustness2, UBO loads are allowed to have a granularity as high as 256B
1971     * depending on hardware limits.  However, we have none of that information
1972     * here.  Short of adding new address formats, the easiest way to do that
1973     * is to just split any loads and stores into individual components here.
1974     *
1975     * TODO: At some point in the future we may want to add more ops similar to
1976     * nir_intrinsic_load_global_constant_bounded and make bouds checking the
1977     * back-end's problem.  Another option would be to somehow plumb more of
1978     * that information through to nir_lower_explicit_io.  For now, however,
1979     * scalarizing is at least correct.
1980     */
1981    bool scalarize = vec_stride > scalar_size ||
1982                     addr_format_needs_bounds_check(addr_format);
1983 
1984    switch (intrin->intrinsic) {
1985    case nir_intrinsic_load_deref: {
1986       nir_def *value;
1987       if (scalarize) {
1988          nir_def *comps[NIR_MAX_VEC_COMPONENTS] = {
1989             NULL,
1990          };
1991          for (unsigned i = 0; i < intrin->num_components; i++) {
1992             unsigned comp_offset = i * vec_stride;
1993             nir_def *comp_addr = nir_build_addr_iadd_imm(b, addr, addr_format,
1994                                                          deref->modes,
1995                                                          comp_offset);
1996             comps[i] = build_explicit_io_load(b, intrin, comp_addr,
1997                                               addr_format, deref->modes,
1998                                               align_mul,
1999                                               (align_offset + comp_offset) %
2000                                                  align_mul,
2001                                               1);
2002          }
2003          value = nir_vec(b, comps, intrin->num_components);
2004       } else {
2005          value = build_explicit_io_load(b, intrin, addr, addr_format,
2006                                         deref->modes, align_mul, align_offset,
2007                                         intrin->num_components);
2008       }
2009       nir_def_rewrite_uses(&intrin->def, value);
2010       break;
2011    }
2012 
2013    case nir_intrinsic_store_deref: {
2014       nir_def *value = intrin->src[1].ssa;
2015       nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
2016       if (scalarize) {
2017          for (unsigned i = 0; i < intrin->num_components; i++) {
2018             if (!(write_mask & (1 << i)))
2019                continue;
2020 
2021             unsigned comp_offset = i * vec_stride;
2022             nir_def *comp_addr = nir_build_addr_iadd_imm(b, addr, addr_format,
2023                                                          deref->modes,
2024                                                          comp_offset);
2025             build_explicit_io_store(b, intrin, comp_addr, addr_format,
2026                                     deref->modes, align_mul,
2027                                     (align_offset + comp_offset) % align_mul,
2028                                     nir_channel(b, value, i), 1);
2029          }
2030       } else {
2031          build_explicit_io_store(b, intrin, addr, addr_format,
2032                                  deref->modes, align_mul, align_offset,
2033                                  value, write_mask);
2034       }
2035       break;
2036    }
2037 
2038    case nir_intrinsic_load_deref_block_intel: {
2039       nir_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
2040                                               deref->modes,
2041                                               align_mul, align_offset,
2042                                               intrin->num_components);
2043       nir_def_rewrite_uses(&intrin->def, value);
2044       break;
2045    }
2046 
2047    case nir_intrinsic_store_deref_block_intel: {
2048       nir_def *value = intrin->src[1].ssa;
2049       const nir_component_mask_t write_mask = 0;
2050       build_explicit_io_store(b, intrin, addr, addr_format,
2051                               deref->modes, align_mul, align_offset,
2052                               value, write_mask);
2053       break;
2054    }
2055 
2056    default: {
2057       nir_def *value =
2058          build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
2059       nir_def_rewrite_uses(&intrin->def, value);
2060       break;
2061    }
2062    }
2063 
2064    nir_instr_remove(&intrin->instr);
2065 }
2066 
2067 bool
nir_get_explicit_deref_align(nir_deref_instr * deref,bool default_to_type_align,uint32_t * align_mul,uint32_t * align_offset)2068 nir_get_explicit_deref_align(nir_deref_instr *deref,
2069                              bool default_to_type_align,
2070                              uint32_t *align_mul,
2071                              uint32_t *align_offset)
2072 {
2073    if (deref->deref_type == nir_deref_type_var) {
2074       /* If we see a variable, align_mul is effectively infinite because we
2075        * know the offset exactly (up to the offset of the base pointer for the
2076        * given variable mode).   We have to pick something so we choose 256B
2077        * as an arbitrary alignment which seems high enough for any reasonable
2078        * wide-load use-case.  Back-ends should clamp alignments down if 256B
2079        * is too large for some reason.
2080        */
2081       *align_mul = 256;
2082       *align_offset = deref->var->data.driver_location % 256;
2083       return true;
2084    }
2085 
2086    /* If we're a cast deref that has an alignment, use that. */
2087    if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) {
2088       *align_mul = deref->cast.align_mul;
2089       *align_offset = deref->cast.align_offset;
2090       return true;
2091    }
2092 
2093    /* Otherwise, we need to compute the alignment based on the parent */
2094    nir_deref_instr *parent = nir_deref_instr_parent(deref);
2095    if (parent == NULL) {
2096       assert(deref->deref_type == nir_deref_type_cast);
2097       if (default_to_type_align) {
2098          /* If we don't have a parent, assume the type's alignment, if any. */
2099          unsigned type_align = glsl_get_explicit_alignment(deref->type);
2100          if (type_align == 0)
2101             return false;
2102 
2103          *align_mul = type_align;
2104          *align_offset = 0;
2105          return true;
2106       } else {
2107          return false;
2108       }
2109    }
2110 
2111    uint32_t parent_mul, parent_offset;
2112    if (!nir_get_explicit_deref_align(parent, default_to_type_align,
2113                                      &parent_mul, &parent_offset))
2114       return false;
2115 
2116    switch (deref->deref_type) {
2117    case nir_deref_type_var:
2118       unreachable("Handled above");
2119 
2120    case nir_deref_type_array:
2121    case nir_deref_type_array_wildcard:
2122    case nir_deref_type_ptr_as_array: {
2123       const unsigned stride = nir_deref_instr_array_stride(deref);
2124       if (stride == 0)
2125          return false;
2126 
2127       if (deref->deref_type != nir_deref_type_array_wildcard &&
2128           nir_src_is_const(deref->arr.index)) {
2129          unsigned offset = nir_src_as_uint(deref->arr.index) * stride;
2130          *align_mul = parent_mul;
2131          *align_offset = (parent_offset + offset) % parent_mul;
2132       } else {
2133          /* If this is a wildcard or an indirect deref, we have to go with the
2134           * power-of-two gcd.
2135           */
2136          *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1));
2137          *align_offset = parent_offset % *align_mul;
2138       }
2139       return true;
2140    }
2141 
2142    case nir_deref_type_struct: {
2143       const int offset = glsl_get_struct_field_offset(parent->type,
2144                                                       deref->strct.index);
2145       if (offset < 0)
2146          return false;
2147 
2148       *align_mul = parent_mul;
2149       *align_offset = (parent_offset + offset) % parent_mul;
2150       return true;
2151    }
2152 
2153    case nir_deref_type_cast:
2154       /* We handled the explicit alignment case above. */
2155       assert(deref->cast.align_mul == 0);
2156       *align_mul = parent_mul;
2157       *align_offset = parent_offset;
2158       return true;
2159    }
2160 
2161    unreachable("Invalid deref_instr_type");
2162 }
2163 
2164 static void
lower_explicit_io_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format)2165 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
2166                         nir_address_format addr_format)
2167 {
2168    /* Ignore samplers/textures, because they are handled by other passes like `nir_lower_samplers`.
2169     * Also do it only for those being uniforms, otherwise it will break GL bindless textures handles
2170     * stored in UBOs.
2171     */
2172    if (nir_deref_mode_is_in_set(deref, nir_var_uniform) &&
2173        (glsl_type_is_sampler(deref->type) ||
2174         glsl_type_is_texture(deref->type)))
2175       return;
2176 
2177    /* Just delete the deref if it's not used.  We can't use
2178     * nir_deref_instr_remove_if_unused here because it may remove more than
2179     * one deref which could break our list walking since we walk the list
2180     * backwards.
2181     */
2182    if (nir_def_is_unused(&deref->def)) {
2183       nir_instr_remove(&deref->instr);
2184       return;
2185    }
2186 
2187    b->cursor = nir_after_instr(&deref->instr);
2188 
2189    nir_def *base_addr = NULL;
2190    if (deref->deref_type != nir_deref_type_var) {
2191       base_addr = deref->parent.ssa;
2192    }
2193 
2194    nir_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
2195                                                       addr_format);
2196    assert(addr->bit_size == deref->def.bit_size);
2197    assert(addr->num_components == deref->def.num_components);
2198 
2199    nir_instr_remove(&deref->instr);
2200    nir_def_rewrite_uses(&deref->def, addr);
2201 }
2202 
2203 static void
lower_explicit_io_access(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2204 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
2205                          nir_address_format addr_format)
2206 {
2207    nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
2208 }
2209 
2210 static void
lower_explicit_io_array_length(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2211 lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
2212                                nir_address_format addr_format)
2213 {
2214    b->cursor = nir_after_instr(&intrin->instr);
2215 
2216    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2217 
2218    assert(glsl_type_is_array(deref->type));
2219    assert(glsl_get_length(deref->type) == 0);
2220    assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
2221    unsigned stride = glsl_get_explicit_stride(deref->type);
2222    assert(stride > 0);
2223 
2224    nir_def *addr = &deref->def;
2225 
2226    nir_def *offset, *size;
2227    switch (addr_format) {
2228    case nir_address_format_64bit_global_32bit_offset:
2229    case nir_address_format_64bit_bounded_global:
2230       offset = nir_channel(b, addr, 3);
2231       size = nir_channel(b, addr, 2);
2232       break;
2233 
2234    case nir_address_format_32bit_index_offset:
2235    case nir_address_format_32bit_index_offset_pack64:
2236    case nir_address_format_vec2_index_32bit_offset: {
2237       offset = addr_to_offset(b, addr, addr_format);
2238       nir_def *index = addr_to_index(b, addr, addr_format);
2239       unsigned access = nir_intrinsic_access(intrin);
2240       size = nir_get_ssbo_size(b, index, .access = access);
2241       break;
2242    }
2243 
2244    default:
2245       unreachable("Cannot determine SSBO size");
2246    }
2247 
2248    nir_def *remaining = nir_usub_sat(b, size, offset);
2249    nir_def *arr_size = nir_udiv_imm(b, remaining, stride);
2250 
2251    nir_def_rewrite_uses(&intrin->def, arr_size);
2252    nir_instr_remove(&intrin->instr);
2253 }
2254 
2255 static void
lower_explicit_io_mode_check(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2256 lower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin,
2257                              nir_address_format addr_format)
2258 {
2259    if (addr_format_is_global(addr_format, 0)) {
2260       /* If the address format is always global, then the driver can use
2261        * global addresses regardless of the mode.  In that case, don't create
2262        * a check, just whack the intrinsic to addr_mode_is and delegate to the
2263        * driver lowering.
2264        */
2265       intrin->intrinsic = nir_intrinsic_addr_mode_is;
2266       return;
2267    }
2268 
2269    nir_def *addr = intrin->src[0].ssa;
2270 
2271    b->cursor = nir_instr_remove(&intrin->instr);
2272 
2273    nir_def *is_mode =
2274       build_runtime_addr_mode_check(b, addr, addr_format,
2275                                     nir_intrinsic_memory_modes(intrin));
2276 
2277    nir_def_rewrite_uses(&intrin->def, is_mode);
2278 }
2279 
2280 static bool
nir_lower_explicit_io_impl(nir_function_impl * impl,nir_variable_mode modes,nir_address_format addr_format)2281 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
2282                            nir_address_format addr_format)
2283 {
2284    bool progress = false;
2285 
2286    nir_builder b = nir_builder_create(impl);
2287 
2288    /* Walk in reverse order so that we can see the full deref chain when we
2289     * lower the access operations.  We lower them assuming that the derefs
2290     * will be turned into address calculations later.
2291     */
2292    nir_foreach_block_reverse(block, impl) {
2293       nir_foreach_instr_reverse_safe(instr, block) {
2294          switch (instr->type) {
2295          case nir_instr_type_deref: {
2296             nir_deref_instr *deref = nir_instr_as_deref(instr);
2297             if (nir_deref_mode_is_in_set(deref, modes)) {
2298                lower_explicit_io_deref(&b, deref, addr_format);
2299                progress = true;
2300             }
2301             break;
2302          }
2303 
2304          case nir_instr_type_intrinsic: {
2305             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2306             switch (intrin->intrinsic) {
2307             case nir_intrinsic_load_deref:
2308             case nir_intrinsic_store_deref:
2309             case nir_intrinsic_load_deref_block_intel:
2310             case nir_intrinsic_store_deref_block_intel:
2311             case nir_intrinsic_deref_atomic:
2312             case nir_intrinsic_deref_atomic_swap: {
2313                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2314                if (nir_deref_mode_is_in_set(deref, modes)) {
2315                   lower_explicit_io_access(&b, intrin, addr_format);
2316                   progress = true;
2317                }
2318                break;
2319             }
2320 
2321             case nir_intrinsic_deref_buffer_array_length: {
2322                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2323                if (nir_deref_mode_is_in_set(deref, modes)) {
2324                   lower_explicit_io_array_length(&b, intrin, addr_format);
2325                   progress = true;
2326                }
2327                break;
2328             }
2329 
2330             case nir_intrinsic_deref_mode_is: {
2331                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2332                if (nir_deref_mode_is_in_set(deref, modes)) {
2333                   lower_explicit_io_mode_check(&b, intrin, addr_format);
2334                   progress = true;
2335                }
2336                break;
2337             }
2338 
2339             case nir_intrinsic_launch_mesh_workgroups_with_payload_deref: {
2340                if (modes & nir_var_mem_task_payload) {
2341                   /* Get address and size of the payload variable. */
2342                   nir_deref_instr *deref = nir_src_as_deref(intrin->src[1]);
2343                   assert(deref->deref_type == nir_deref_type_var);
2344                   unsigned base = deref->var->data.explicit_location;
2345                   unsigned size = glsl_get_explicit_size(deref->var->type, false);
2346 
2347                   /* Replace the current instruction with the explicit intrinsic. */
2348                   nir_def *dispatch_3d = intrin->src[0].ssa;
2349                   b.cursor = nir_instr_remove(instr);
2350                   nir_launch_mesh_workgroups(&b, dispatch_3d, .base = base, .range = size);
2351                   progress = true;
2352                }
2353 
2354                break;
2355             }
2356 
2357             default:
2358                break;
2359             }
2360             break;
2361          }
2362 
2363          default:
2364             /* Nothing to do */
2365             break;
2366          }
2367       }
2368    }
2369 
2370    if (progress) {
2371       nir_metadata_preserve(impl, nir_metadata_none);
2372    } else {
2373       nir_metadata_preserve(impl, nir_metadata_all);
2374    }
2375 
2376    return progress;
2377 }
2378 
2379 /** Lower explicitly laid out I/O access to byte offset/address intrinsics
2380  *
2381  * This pass is intended to be used for any I/O which touches memory external
2382  * to the shader or which is directly visible to the client.  It requires that
2383  * all data types in the given modes have a explicit stride/offset decorations
2384  * to tell it exactly how to calculate the offset/address for the given load,
2385  * store, or atomic operation.  If the offset/stride information does not come
2386  * from the client explicitly (as with shared variables in GL or Vulkan),
2387  * nir_lower_vars_to_explicit_types() can be used to add them.
2388  *
2389  * Unlike nir_lower_io, this pass is fully capable of handling incomplete
2390  * pointer chains which may contain cast derefs.  It does so by walking the
2391  * deref chain backwards and simply replacing each deref, one at a time, with
2392  * the appropriate address calculation.  The pass takes a nir_address_format
2393  * parameter which describes how the offset or address is to be represented
2394  * during calculations.  By ensuring that the address is always in a
2395  * consistent format, pointers can safely be conjured from thin air by the
2396  * driver, stored to variables, passed through phis, etc.
2397  *
2398  * The one exception to the simple algorithm described above is for handling
2399  * row-major matrices in which case we may look down one additional level of
2400  * the deref chain.
2401  *
2402  * This pass is also capable of handling OpenCL generic pointers.  If the
2403  * address mode is global, it will lower any ambiguous (more than one mode)
2404  * access to global and pass through the deref_mode_is run-time checks as
2405  * addr_mode_is.  This assumes the driver has somehow mapped shared and
2406  * scratch memory to the global address space.  For other modes such as
2407  * 62bit_generic, there is an enum embedded in the address and we lower
2408  * ambiguous access to an if-ladder and deref_mode_is to a check against the
2409  * embedded enum.  If nir_lower_explicit_io is called on any shader that
2410  * contains generic pointers, it must either be used on all of the generic
2411  * modes or none.
2412  */
2413 bool
nir_lower_explicit_io(nir_shader * shader,nir_variable_mode modes,nir_address_format addr_format)2414 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
2415                       nir_address_format addr_format)
2416 {
2417    bool progress = false;
2418 
2419    nir_foreach_function_impl(impl, shader) {
2420       if (impl && nir_lower_explicit_io_impl(impl, modes, addr_format))
2421          progress = true;
2422    }
2423 
2424    return progress;
2425 }
2426 
2427 static bool
nir_lower_vars_to_explicit_types_impl(nir_function_impl * impl,nir_variable_mode modes,glsl_type_size_align_func type_info)2428 nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
2429                                       nir_variable_mode modes,
2430                                       glsl_type_size_align_func type_info)
2431 {
2432    bool progress = false;
2433 
2434    nir_foreach_block(block, impl) {
2435       nir_foreach_instr(instr, block) {
2436          if (instr->type != nir_instr_type_deref)
2437             continue;
2438 
2439          nir_deref_instr *deref = nir_instr_as_deref(instr);
2440          if (!nir_deref_mode_is_in_set(deref, modes))
2441             continue;
2442 
2443          unsigned size, alignment;
2444          const struct glsl_type *new_type =
2445             glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
2446          if (new_type != deref->type) {
2447             progress = true;
2448             deref->type = new_type;
2449          }
2450          if (deref->deref_type == nir_deref_type_cast) {
2451             /* See also glsl_type::get_explicit_type_for_size_align() */
2452             unsigned new_stride = align(size, alignment);
2453             if (new_stride != deref->cast.ptr_stride) {
2454                deref->cast.ptr_stride = new_stride;
2455                progress = true;
2456             }
2457          }
2458       }
2459    }
2460 
2461    if (progress) {
2462       nir_metadata_preserve(impl, nir_metadata_block_index |
2463                                      nir_metadata_dominance |
2464                                      nir_metadata_live_defs |
2465                                      nir_metadata_loop_analysis);
2466    } else {
2467       nir_metadata_preserve(impl, nir_metadata_all);
2468    }
2469 
2470    return progress;
2471 }
2472 
2473 static bool
lower_vars_to_explicit(nir_shader * shader,struct exec_list * vars,nir_variable_mode mode,glsl_type_size_align_func type_info)2474 lower_vars_to_explicit(nir_shader *shader,
2475                        struct exec_list *vars, nir_variable_mode mode,
2476                        glsl_type_size_align_func type_info)
2477 {
2478    bool progress = false;
2479    unsigned offset;
2480    switch (mode) {
2481    case nir_var_uniform:
2482       assert(shader->info.stage == MESA_SHADER_KERNEL);
2483       offset = 0;
2484       break;
2485    case nir_var_function_temp:
2486    case nir_var_shader_temp:
2487       offset = shader->scratch_size;
2488       break;
2489    case nir_var_mem_shared:
2490       offset = shader->info.shared_size;
2491       break;
2492    case nir_var_mem_task_payload:
2493       offset = shader->info.task_payload_size;
2494       break;
2495    case nir_var_mem_node_payload:
2496       assert(!shader->info.cs.node_payloads_size);
2497       offset = 0;
2498       break;
2499    case nir_var_mem_global:
2500       offset = shader->global_mem_size;
2501       break;
2502    case nir_var_mem_constant:
2503       offset = shader->constant_data_size;
2504       break;
2505    case nir_var_shader_call_data:
2506    case nir_var_ray_hit_attrib:
2507    case nir_var_mem_node_payload_in:
2508       offset = 0;
2509       break;
2510    default:
2511       unreachable("Unsupported mode");
2512    }
2513    nir_foreach_variable_in_list(var, vars) {
2514       if (var->data.mode != mode)
2515          continue;
2516 
2517       unsigned size, alignment;
2518       const struct glsl_type *explicit_type =
2519          glsl_get_explicit_type_for_size_align(var->type, type_info,
2520                                                &size, &alignment);
2521 
2522       if (explicit_type != var->type)
2523          var->type = explicit_type;
2524 
2525       UNUSED bool is_empty_struct =
2526          glsl_type_is_struct_or_ifc(explicit_type) &&
2527          glsl_get_length(explicit_type) == 0;
2528 
2529       assert(util_is_power_of_two_nonzero(alignment) || is_empty_struct ||
2530              glsl_type_is_cmat(glsl_without_array(explicit_type)));
2531       assert(util_is_power_of_two_or_zero(var->data.alignment));
2532       alignment = MAX2(alignment, var->data.alignment);
2533 
2534       var->data.driver_location = ALIGN_POT(offset, alignment);
2535       offset = var->data.driver_location + size;
2536       progress = true;
2537    }
2538 
2539    switch (mode) {
2540    case nir_var_uniform:
2541       assert(shader->info.stage == MESA_SHADER_KERNEL);
2542       shader->num_uniforms = offset;
2543       break;
2544    case nir_var_shader_temp:
2545    case nir_var_function_temp:
2546       shader->scratch_size = offset;
2547       break;
2548    case nir_var_mem_shared:
2549       shader->info.shared_size = offset;
2550       break;
2551    case nir_var_mem_task_payload:
2552       shader->info.task_payload_size = offset;
2553       break;
2554    case nir_var_mem_node_payload:
2555       shader->info.cs.node_payloads_size = offset;
2556       break;
2557    case nir_var_mem_global:
2558       shader->global_mem_size = offset;
2559       break;
2560    case nir_var_mem_constant:
2561       shader->constant_data_size = offset;
2562       break;
2563    case nir_var_shader_call_data:
2564    case nir_var_ray_hit_attrib:
2565    case nir_var_mem_node_payload_in:
2566       break;
2567    default:
2568       unreachable("Unsupported mode");
2569    }
2570 
2571    return progress;
2572 }
2573 
2574 /* If nir_lower_vars_to_explicit_types is called on any shader that contains
2575  * generic pointers, it must either be used on all of the generic modes or
2576  * none.
2577  */
2578 bool
nir_lower_vars_to_explicit_types(nir_shader * shader,nir_variable_mode modes,glsl_type_size_align_func type_info)2579 nir_lower_vars_to_explicit_types(nir_shader *shader,
2580                                  nir_variable_mode modes,
2581                                  glsl_type_size_align_func type_info)
2582 {
2583    /* TODO: Situations which need to be handled to support more modes:
2584     * - row-major matrices
2585     * - compact shader inputs/outputs
2586     * - interface types
2587     */
2588    ASSERTED nir_variable_mode supported =
2589       nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant |
2590       nir_var_shader_temp | nir_var_function_temp | nir_var_uniform |
2591       nir_var_shader_call_data | nir_var_ray_hit_attrib |
2592       nir_var_mem_task_payload | nir_var_mem_node_payload |
2593       nir_var_mem_node_payload_in;
2594    assert(!(modes & ~supported) && "unsupported");
2595 
2596    bool progress = false;
2597 
2598    if (modes & nir_var_uniform)
2599       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info);
2600    if (modes & nir_var_mem_global)
2601       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info);
2602 
2603    if (modes & nir_var_mem_shared) {
2604       assert(!shader->info.shared_memory_explicit_layout);
2605       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info);
2606    }
2607 
2608    if (modes & nir_var_shader_temp)
2609       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info);
2610    if (modes & nir_var_mem_constant)
2611       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info);
2612    if (modes & nir_var_shader_call_data)
2613       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info);
2614    if (modes & nir_var_ray_hit_attrib)
2615       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info);
2616    if (modes & nir_var_mem_task_payload)
2617       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_task_payload, type_info);
2618    if (modes & nir_var_mem_node_payload)
2619       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload, type_info);
2620    if (modes & nir_var_mem_node_payload_in)
2621       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload_in, type_info);
2622 
2623    nir_foreach_function_impl(impl, shader) {
2624       if (modes & nir_var_function_temp)
2625          progress |= lower_vars_to_explicit(shader, &impl->locals, nir_var_function_temp, type_info);
2626 
2627       progress |= nir_lower_vars_to_explicit_types_impl(impl, modes, type_info);
2628    }
2629 
2630    return progress;
2631 }
2632 
2633 static void
write_constant(void * dst,size_t dst_size,const nir_constant * c,const struct glsl_type * type)2634 write_constant(void *dst, size_t dst_size,
2635                const nir_constant *c, const struct glsl_type *type)
2636 {
2637    if (c->is_null_constant) {
2638       memset(dst, 0, dst_size);
2639       return;
2640    }
2641 
2642    if (glsl_type_is_vector_or_scalar(type)) {
2643       const unsigned num_components = glsl_get_vector_elements(type);
2644       const unsigned bit_size = glsl_get_bit_size(type);
2645       if (bit_size == 1) {
2646          /* Booleans are special-cased to be 32-bit
2647           *
2648           * TODO: Make the native bool bit_size an option.
2649           */
2650          assert(num_components * 4 <= dst_size);
2651          for (unsigned i = 0; i < num_components; i++) {
2652             int32_t b32 = -(int)c->values[i].b;
2653             memcpy((char *)dst + i * 4, &b32, 4);
2654          }
2655       } else {
2656          assert(bit_size >= 8 && bit_size % 8 == 0);
2657          const unsigned byte_size = bit_size / 8;
2658          assert(num_components * byte_size <= dst_size);
2659          for (unsigned i = 0; i < num_components; i++) {
2660             /* Annoyingly, thanks to packed structs, we can't make any
2661              * assumptions about the alignment of dst.  To avoid any strange
2662              * issues with unaligned writes, we always use memcpy.
2663              */
2664             memcpy((char *)dst + i * byte_size, &c->values[i], byte_size);
2665          }
2666       }
2667    } else if (glsl_type_is_array_or_matrix(type)) {
2668       const unsigned array_len = glsl_get_length(type);
2669       const unsigned stride = glsl_get_explicit_stride(type);
2670       assert(stride > 0);
2671       const struct glsl_type *elem_type = glsl_get_array_element(type);
2672       for (unsigned i = 0; i < array_len; i++) {
2673          unsigned elem_offset = i * stride;
2674          assert(elem_offset < dst_size);
2675          write_constant((char *)dst + elem_offset, dst_size - elem_offset,
2676                         c->elements[i], elem_type);
2677       }
2678    } else {
2679       assert(glsl_type_is_struct_or_ifc(type));
2680       const unsigned num_fields = glsl_get_length(type);
2681       for (unsigned i = 0; i < num_fields; i++) {
2682          const int field_offset = glsl_get_struct_field_offset(type, i);
2683          assert(field_offset >= 0 && field_offset < dst_size);
2684          const struct glsl_type *field_type = glsl_get_struct_field(type, i);
2685          write_constant((char *)dst + field_offset, dst_size - field_offset,
2686                         c->elements[i], field_type);
2687       }
2688    }
2689 }
2690 
2691 void
nir_gather_explicit_io_initializers(nir_shader * shader,void * dst,size_t dst_size,nir_variable_mode mode)2692 nir_gather_explicit_io_initializers(nir_shader *shader,
2693                                     void *dst, size_t dst_size,
2694                                     nir_variable_mode mode)
2695 {
2696    /* It doesn't really make sense to gather initializers for more than one
2697     * mode at a time.  If this ever becomes well-defined, we can drop the
2698     * assert then.
2699     */
2700    assert(util_bitcount(mode) == 1);
2701 
2702    nir_foreach_variable_with_modes(var, shader, mode) {
2703       assert(var->data.driver_location < dst_size);
2704       write_constant((char *)dst + var->data.driver_location,
2705                      dst_size - var->data.driver_location,
2706                      var->constant_initializer, var->type);
2707    }
2708 }
2709 
2710 /**
2711  * Return the offset source number for a load/store intrinsic or -1 if there's no offset.
2712  */
2713 int
nir_get_io_offset_src_number(const nir_intrinsic_instr * instr)2714 nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
2715 {
2716    switch (instr->intrinsic) {
2717    case nir_intrinsic_load_input:
2718    case nir_intrinsic_load_output:
2719    case nir_intrinsic_load_shared:
2720    case nir_intrinsic_load_task_payload:
2721    case nir_intrinsic_load_uniform:
2722    case nir_intrinsic_load_kernel_input:
2723    case nir_intrinsic_load_global:
2724    case nir_intrinsic_load_global_2x32:
2725    case nir_intrinsic_load_global_constant:
2726    case nir_intrinsic_load_scratch:
2727    case nir_intrinsic_load_fs_input_interp_deltas:
2728    case nir_intrinsic_shared_atomic:
2729    case nir_intrinsic_shared_atomic_swap:
2730    case nir_intrinsic_task_payload_atomic:
2731    case nir_intrinsic_task_payload_atomic_swap:
2732    case nir_intrinsic_global_atomic:
2733    case nir_intrinsic_global_atomic_swap:
2734       return 0;
2735    case nir_intrinsic_load_ubo:
2736    case nir_intrinsic_load_ssbo:
2737    case nir_intrinsic_load_input_vertex:
2738    case nir_intrinsic_load_per_vertex_input:
2739    case nir_intrinsic_load_per_vertex_output:
2740    case nir_intrinsic_load_per_primitive_output:
2741    case nir_intrinsic_load_interpolated_input:
2742    case nir_intrinsic_store_output:
2743    case nir_intrinsic_store_shared:
2744    case nir_intrinsic_store_task_payload:
2745    case nir_intrinsic_store_global:
2746    case nir_intrinsic_store_global_2x32:
2747    case nir_intrinsic_store_scratch:
2748    case nir_intrinsic_ssbo_atomic:
2749    case nir_intrinsic_ssbo_atomic_swap:
2750       return 1;
2751    case nir_intrinsic_store_ssbo:
2752    case nir_intrinsic_store_per_vertex_output:
2753    case nir_intrinsic_store_per_primitive_output:
2754       return 2;
2755    default:
2756       return -1;
2757    }
2758 }
2759 
2760 /**
2761  * Return the offset source for a load/store intrinsic.
2762  */
2763 nir_src *
nir_get_io_offset_src(nir_intrinsic_instr * instr)2764 nir_get_io_offset_src(nir_intrinsic_instr *instr)
2765 {
2766    const int idx = nir_get_io_offset_src_number(instr);
2767    return idx >= 0 ? &instr->src[idx] : NULL;
2768 }
2769 
2770 /**
2771  * Return the vertex index source number for a load/store per_vertex intrinsic or -1 if there's no offset.
2772  */
2773 int
nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr * instr)2774 nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr *instr)
2775 {
2776    switch (instr->intrinsic) {
2777    case nir_intrinsic_load_per_vertex_input:
2778    case nir_intrinsic_load_per_vertex_output:
2779    case nir_intrinsic_load_per_primitive_output:
2780       return 0;
2781    case nir_intrinsic_store_per_vertex_output:
2782    case nir_intrinsic_store_per_primitive_output:
2783       return 1;
2784    default:
2785       return -1;
2786    }
2787 }
2788 
2789 /**
2790  * Return the vertex index source for a load/store per_vertex intrinsic.
2791  */
2792 nir_src *
nir_get_io_arrayed_index_src(nir_intrinsic_instr * instr)2793 nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr)
2794 {
2795    const int idx = nir_get_io_arrayed_index_src_number(instr);
2796    return idx >= 0 ? &instr->src[idx] : NULL;
2797 }
2798 
2799 /**
2800  * Return the numeric constant that identify a NULL pointer for each address
2801  * format.
2802  */
2803 const nir_const_value *
nir_address_format_null_value(nir_address_format addr_format)2804 nir_address_format_null_value(nir_address_format addr_format)
2805 {
2806    const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
2807       [nir_address_format_32bit_global] = { { 0 } },
2808       [nir_address_format_2x32bit_global] = { { 0 } },
2809       [nir_address_format_64bit_global] = { { 0 } },
2810       [nir_address_format_64bit_global_32bit_offset] = { { 0 } },
2811       [nir_address_format_64bit_bounded_global] = { { 0 } },
2812       [nir_address_format_32bit_index_offset] = { { .u32 = ~0 }, { .u32 = ~0 } },
2813       [nir_address_format_32bit_index_offset_pack64] = { { .u64 = ~0ull } },
2814       [nir_address_format_vec2_index_32bit_offset] = { { .u32 = ~0 }, { .u32 = ~0 }, { .u32 = ~0 } },
2815       [nir_address_format_32bit_offset] = { { .u32 = ~0 } },
2816       [nir_address_format_32bit_offset_as_64bit] = { { .u64 = ~0ull } },
2817       [nir_address_format_62bit_generic] = { { .u64 = 0 } },
2818       [nir_address_format_logical] = { { .u32 = ~0 } },
2819    };
2820 
2821    assert(addr_format < ARRAY_SIZE(null_values));
2822    return null_values[addr_format];
2823 }
2824 
2825 nir_def *
nir_build_addr_ieq(nir_builder * b,nir_def * addr0,nir_def * addr1,nir_address_format addr_format)2826 nir_build_addr_ieq(nir_builder *b, nir_def *addr0, nir_def *addr1,
2827                    nir_address_format addr_format)
2828 {
2829    switch (addr_format) {
2830    case nir_address_format_32bit_global:
2831    case nir_address_format_2x32bit_global:
2832    case nir_address_format_64bit_global:
2833    case nir_address_format_64bit_bounded_global:
2834    case nir_address_format_32bit_index_offset:
2835    case nir_address_format_vec2_index_32bit_offset:
2836    case nir_address_format_32bit_offset:
2837    case nir_address_format_62bit_generic:
2838       return nir_ball_iequal(b, addr0, addr1);
2839 
2840    case nir_address_format_64bit_global_32bit_offset:
2841       return nir_ball_iequal(b, nir_channels(b, addr0, 0xb),
2842                              nir_channels(b, addr1, 0xb));
2843 
2844    case nir_address_format_32bit_offset_as_64bit:
2845       assert(addr0->num_components == 1 && addr1->num_components == 1);
2846       return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1));
2847 
2848    case nir_address_format_32bit_index_offset_pack64:
2849       assert(addr0->num_components == 1 && addr1->num_components == 1);
2850       return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1));
2851 
2852    case nir_address_format_logical:
2853       unreachable("Unsupported address format");
2854    }
2855 
2856    unreachable("Invalid address format");
2857 }
2858 
2859 nir_def *
nir_build_addr_isub(nir_builder * b,nir_def * addr0,nir_def * addr1,nir_address_format addr_format)2860 nir_build_addr_isub(nir_builder *b, nir_def *addr0, nir_def *addr1,
2861                     nir_address_format addr_format)
2862 {
2863    switch (addr_format) {
2864    case nir_address_format_32bit_global:
2865    case nir_address_format_64bit_global:
2866    case nir_address_format_32bit_offset:
2867    case nir_address_format_32bit_index_offset_pack64:
2868    case nir_address_format_62bit_generic:
2869       assert(addr0->num_components == 1);
2870       assert(addr1->num_components == 1);
2871       return nir_isub(b, addr0, addr1);
2872 
2873    case nir_address_format_2x32bit_global:
2874       return nir_isub(b, addr_to_global(b, addr0, addr_format),
2875                       addr_to_global(b, addr1, addr_format));
2876 
2877    case nir_address_format_32bit_offset_as_64bit:
2878       assert(addr0->num_components == 1);
2879       assert(addr1->num_components == 1);
2880       return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)));
2881 
2882    case nir_address_format_64bit_global_32bit_offset:
2883    case nir_address_format_64bit_bounded_global:
2884       return nir_isub(b, addr_to_global(b, addr0, addr_format),
2885                       addr_to_global(b, addr1, addr_format));
2886 
2887    case nir_address_format_32bit_index_offset:
2888       assert(addr0->num_components == 2);
2889       assert(addr1->num_components == 2);
2890       /* Assume the same buffer index. */
2891       return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
2892 
2893    case nir_address_format_vec2_index_32bit_offset:
2894       assert(addr0->num_components == 3);
2895       assert(addr1->num_components == 3);
2896       /* Assume the same buffer index. */
2897       return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2));
2898 
2899    case nir_address_format_logical:
2900       unreachable("Unsupported address format");
2901    }
2902 
2903    unreachable("Invalid address format");
2904 }
2905 
2906 static bool
is_input(nir_intrinsic_instr * intrin)2907 is_input(nir_intrinsic_instr *intrin)
2908 {
2909    return intrin->intrinsic == nir_intrinsic_load_input ||
2910           intrin->intrinsic == nir_intrinsic_load_input_vertex ||
2911           intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
2912           intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
2913           intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
2914 }
2915 
2916 static bool
is_output(nir_intrinsic_instr * intrin)2917 is_output(nir_intrinsic_instr *intrin)
2918 {
2919    return intrin->intrinsic == nir_intrinsic_load_output ||
2920           intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
2921           intrin->intrinsic == nir_intrinsic_load_per_primitive_output ||
2922           intrin->intrinsic == nir_intrinsic_store_output ||
2923           intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2924           intrin->intrinsic == nir_intrinsic_store_per_primitive_output;
2925 }
2926 
2927 static bool
is_dual_slot(nir_intrinsic_instr * intrin)2928 is_dual_slot(nir_intrinsic_instr *intrin)
2929 {
2930    if (intrin->intrinsic == nir_intrinsic_store_output ||
2931        intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2932        intrin->intrinsic == nir_intrinsic_store_per_primitive_output) {
2933       return nir_src_bit_size(intrin->src[0]) == 64 &&
2934              nir_src_num_components(intrin->src[0]) >= 3;
2935    }
2936 
2937    return intrin->def.bit_size == 64 &&
2938    intrin->def.num_components >= 3;
2939 }
2940 
2941 /**
2942  * This pass adds constant offsets to instr->const_index[0] for input/output
2943  * intrinsics, and resets the offset source to 0.  Non-constant offsets remain
2944  * unchanged - since we don't know what part of a compound variable is
2945  * accessed, we allocate storage for the entire thing. For drivers that use
2946  * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
2947  * the offset source will be 0, so that they don't have to add it in manually.
2948  */
2949 
2950 static bool
add_const_offset_to_base_block(nir_block * block,nir_builder * b,nir_variable_mode modes)2951 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
2952                                nir_variable_mode modes)
2953 {
2954    bool progress = false;
2955    nir_foreach_instr_safe(instr, block) {
2956       if (instr->type != nir_instr_type_intrinsic)
2957          continue;
2958 
2959       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2960 
2961       if (((modes & nir_var_shader_in) && is_input(intrin)) ||
2962           ((modes & nir_var_shader_out) && is_output(intrin))) {
2963          nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
2964 
2965          /* NV_mesh_shader: ignore MS primitive indices. */
2966          if (b->shader->info.stage == MESA_SHADER_MESH &&
2967              sem.location == VARYING_SLOT_PRIMITIVE_INDICES &&
2968              !(b->shader->info.per_primitive_outputs &
2969                BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES)))
2970             continue;
2971 
2972          nir_src *offset = nir_get_io_offset_src(intrin);
2973 
2974          /* TODO: Better handling of per-view variables here */
2975          if (nir_src_is_const(*offset) &&
2976              !nir_intrinsic_io_semantics(intrin).per_view) {
2977             unsigned off = nir_src_as_uint(*offset);
2978 
2979             nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off);
2980 
2981             sem.location += off;
2982             /* non-indirect indexing should reduce num_slots */
2983             sem.num_slots = is_dual_slot(intrin) ? 2 : 1;
2984             nir_intrinsic_set_io_semantics(intrin, sem);
2985 
2986             b->cursor = nir_before_instr(&intrin->instr);
2987             nir_src_rewrite(offset, nir_imm_int(b, 0));
2988             progress = true;
2989          }
2990       }
2991    }
2992 
2993    return progress;
2994 }
2995 
2996 bool
nir_io_add_const_offset_to_base(nir_shader * nir,nir_variable_mode modes)2997 nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes)
2998 {
2999    bool progress = false;
3000 
3001    nir_foreach_function_impl(impl, nir) {
3002       bool impl_progress = false;
3003       nir_builder b = nir_builder_create(impl);
3004       nir_foreach_block(block, impl) {
3005          impl_progress |= add_const_offset_to_base_block(block, &b, modes);
3006       }
3007       progress |= impl_progress;
3008       if (impl_progress)
3009          nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
3010       else
3011          nir_metadata_preserve(impl, nir_metadata_all);
3012    }
3013 
3014    return progress;
3015 }
3016 
3017 bool
nir_lower_color_inputs(nir_shader * nir)3018 nir_lower_color_inputs(nir_shader *nir)
3019 {
3020    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3021    bool progress = false;
3022 
3023    nir_builder b = nir_builder_create(impl);
3024 
3025    nir_foreach_block(block, impl) {
3026       nir_foreach_instr_safe(instr, block) {
3027          if (instr->type != nir_instr_type_intrinsic)
3028             continue;
3029 
3030          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3031 
3032          if (intrin->intrinsic != nir_intrinsic_load_input &&
3033              intrin->intrinsic != nir_intrinsic_load_interpolated_input)
3034             continue;
3035 
3036          nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
3037 
3038          if (sem.location != VARYING_SLOT_COL0 &&
3039              sem.location != VARYING_SLOT_COL1)
3040             continue;
3041 
3042          /* Default to FLAT (for load_input) */
3043          enum glsl_interp_mode interp = INTERP_MODE_FLAT;
3044          bool sample = false;
3045          bool centroid = false;
3046 
3047          if (intrin->intrinsic == nir_intrinsic_load_interpolated_input) {
3048             nir_intrinsic_instr *baryc =
3049                nir_instr_as_intrinsic(intrin->src[0].ssa->parent_instr);
3050 
3051             centroid =
3052                baryc->intrinsic == nir_intrinsic_load_barycentric_centroid;
3053             sample =
3054                baryc->intrinsic == nir_intrinsic_load_barycentric_sample;
3055             assert(centroid || sample ||
3056                    baryc->intrinsic == nir_intrinsic_load_barycentric_pixel);
3057 
3058             interp = nir_intrinsic_interp_mode(baryc);
3059          }
3060 
3061          b.cursor = nir_before_instr(instr);
3062          nir_def *load = NULL;
3063 
3064          if (sem.location == VARYING_SLOT_COL0) {
3065             load = nir_load_color0(&b);
3066             nir->info.fs.color0_interp = interp;
3067             nir->info.fs.color0_sample = sample;
3068             nir->info.fs.color0_centroid = centroid;
3069          } else {
3070             assert(sem.location == VARYING_SLOT_COL1);
3071             load = nir_load_color1(&b);
3072             nir->info.fs.color1_interp = interp;
3073             nir->info.fs.color1_sample = sample;
3074             nir->info.fs.color1_centroid = centroid;
3075          }
3076 
3077          if (intrin->num_components != 4) {
3078             unsigned start = nir_intrinsic_component(intrin);
3079             unsigned count = intrin->num_components;
3080             load = nir_channels(&b, load, BITFIELD_RANGE(start, count));
3081          }
3082 
3083          nir_def_rewrite_uses(&intrin->def, load);
3084          nir_instr_remove(instr);
3085          progress = true;
3086       }
3087    }
3088 
3089    if (progress) {
3090       nir_metadata_preserve(impl, nir_metadata_dominance |
3091                                      nir_metadata_block_index);
3092    } else {
3093       nir_metadata_preserve(impl, nir_metadata_all);
3094    }
3095    return progress;
3096 }
3097 
3098 bool
nir_io_add_intrinsic_xfb_info(nir_shader * nir)3099 nir_io_add_intrinsic_xfb_info(nir_shader *nir)
3100 {
3101    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3102    bool progress = false;
3103 
3104    for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++)
3105       nir->info.xfb_stride[i] = nir->xfb_info->buffers[i].stride / 4;
3106 
3107    nir_foreach_block(block, impl) {
3108       nir_foreach_instr_safe(instr, block) {
3109          if (instr->type != nir_instr_type_intrinsic)
3110             continue;
3111 
3112          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3113 
3114          if (!nir_intrinsic_has_io_xfb(intr))
3115             continue;
3116 
3117          /* No indirect indexing allowed. The index is implied to be 0. */
3118          ASSERTED nir_src offset = *nir_get_io_offset_src(intr);
3119          assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
3120 
3121          /* Calling this pass for the second time shouldn't do anything. */
3122          if (nir_intrinsic_io_xfb(intr).out[0].num_components ||
3123              nir_intrinsic_io_xfb(intr).out[1].num_components ||
3124              nir_intrinsic_io_xfb2(intr).out[0].num_components ||
3125              nir_intrinsic_io_xfb2(intr).out[1].num_components)
3126             continue;
3127 
3128          nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3129          unsigned writemask = nir_intrinsic_write_mask(intr) << nir_intrinsic_component(intr);
3130 
3131          nir_io_xfb xfb[2];
3132          memset(xfb, 0, sizeof(xfb));
3133 
3134          for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
3135             nir_xfb_output_info *out = &nir->xfb_info->outputs[i];
3136             if (out->location == sem.location) {
3137                unsigned xfb_mask = writemask & out->component_mask;
3138 
3139                /*fprintf(stdout, "output%u: buffer=%u, offset=%u, location=%u, "
3140                            "component_offset=%u, component_mask=0x%x, xfb_mask=0x%x, slots=%u\n",
3141                        i, out->buffer,
3142                        out->offset,
3143                        out->location,
3144                        out->component_offset,
3145                        out->component_mask,
3146                        xfb_mask, sem.num_slots);*/
3147 
3148                while (xfb_mask) {
3149                   int start, count;
3150                   u_bit_scan_consecutive_range(&xfb_mask, &start, &count);
3151 
3152                   xfb[start / 2].out[start % 2].num_components = count;
3153                   xfb[start / 2].out[start % 2].buffer = out->buffer;
3154                   /* out->offset is relative to the first stored xfb component */
3155                   /* start is relative to component 0 */
3156                   xfb[start / 2].out[start % 2].offset =
3157                      out->offset / 4 - out->component_offset + start;
3158 
3159                   progress = true;
3160                }
3161             }
3162          }
3163 
3164          nir_intrinsic_set_io_xfb(intr, xfb[0]);
3165          nir_intrinsic_set_io_xfb2(intr, xfb[1]);
3166       }
3167    }
3168 
3169    nir_metadata_preserve(impl, nir_metadata_all);
3170    return progress;
3171 }
3172 
3173 static int
type_size_vec4(const struct glsl_type * type,bool bindless)3174 type_size_vec4(const struct glsl_type *type, bool bindless)
3175 {
3176    return glsl_count_attribute_slots(type, false);
3177 }
3178 
3179 /**
3180  * This runs all compiler passes needed to lower IO, lower indirect IO access,
3181  * set transform feedback info in IO intrinsics, and clean up the IR.
3182  *
3183  * \param renumber_vs_inputs
3184  *    Set to true if holes between VS inputs should be removed, which is safe
3185  *    to do in any shader linker that can handle that. Set to false if you want
3186  *    to keep holes between VS inputs, which is recommended to do in gallium
3187  *    drivers so as not to break the mapping of vertex elements to VS inputs
3188  *    expected by gallium frontends.
3189  */
3190 void
nir_lower_io_passes(nir_shader * nir,bool renumber_vs_inputs)3191 nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
3192 {
3193    if (nir->info.stage == MESA_SHADER_COMPUTE)
3194       return;
3195 
3196    bool has_indirect_inputs =
3197       (nir->options->support_indirect_inputs >> nir->info.stage) & 0x1;
3198 
3199    /* Transform feedback requires that indirect outputs are lowered. */
3200    bool has_indirect_outputs =
3201       (nir->options->support_indirect_outputs >> nir->info.stage) & 0x1 &&
3202       nir->xfb_info == NULL;
3203 
3204    /* TODO: Sorting variables by location is required due to some bug
3205     * in nir_lower_io_to_temporaries. If variables are not sorted,
3206     * dEQP-GLES31.functional.separate_shader.random.0 fails.
3207     *
3208     * This isn't needed if nir_assign_io_var_locations is called because it
3209     * also sorts variables. However, if IO is lowered sooner than that, we
3210     * must sort explicitly here to get what nir_assign_io_var_locations does.
3211     */
3212    unsigned varying_var_mask =
3213       (nir->info.stage != MESA_SHADER_VERTEX ? nir_var_shader_in : 0) |
3214       (nir->info.stage != MESA_SHADER_FRAGMENT ? nir_var_shader_out : 0);
3215    nir_sort_variables_by_location(nir, varying_var_mask);
3216 
3217    if (!has_indirect_inputs || !has_indirect_outputs) {
3218       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
3219                  nir_shader_get_entrypoint(nir), !has_indirect_outputs,
3220                  !has_indirect_inputs);
3221 
3222       /* We need to lower all the copy_deref's introduced by lower_io_to-
3223        * _temporaries before calling nir_lower_io.
3224        */
3225       NIR_PASS_V(nir, nir_split_var_copies);
3226       NIR_PASS_V(nir, nir_lower_var_copies);
3227       NIR_PASS_V(nir, nir_lower_global_vars_to_local);
3228    }
3229 
3230    NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in,
3231               type_size_vec4, nir_lower_io_lower_64bit_to_32);
3232 
3233    /* nir_io_add_const_offset_to_base needs actual constants. */
3234    NIR_PASS_V(nir, nir_opt_constant_folding);
3235    NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
3236 
3237    /* Lower and remove dead derefs and variables to clean up the IR. */
3238    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3239    NIR_PASS_V(nir, nir_opt_dce);
3240    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3241 
3242    /* If IO is lowered before var->data.driver_location is assigned, driver
3243     * locations are all 0, which means IO bases are all 0. It's not necessary
3244     * to set driver_location before lowering IO because the only thing that
3245     * identifies outputs is their semantic, and IO bases can always be
3246     * computed from the semantics.
3247     *
3248     * This assigns IO bases from scratch, using IO semantics to tell which
3249     * intrinsics refer to the same IO. If the bases already exist, they
3250     * will be reassigned, sorted by the semantic, and all holes removed.
3251     * This kind of canonicalizes all bases.
3252     *
3253     * This must be done after DCE to remove dead load_input intrinsics.
3254     */
3255    NIR_PASS_V(nir, nir_recompute_io_bases,
3256               (nir->info.stage != MESA_SHADER_VERTEX || renumber_vs_inputs ?
3257                nir_var_shader_in : 0) | nir_var_shader_out);
3258 
3259    if (nir->xfb_info)
3260       NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
3261 
3262    if (nir->options->lower_mediump_io)
3263       nir->options->lower_mediump_io(nir);
3264 
3265    nir->info.io_lowered = true;
3266 }
3267