• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Connor Abbott (cwabbott0@gmail.com)
25  *
26  */
27 
28 #include "nir.h"
29 #include "nir_builder.h"
30 #include "util/u_math.h"
31 #include "util/set.h"
32 
33 struct lower_sysval_state {
34    const nir_lower_compute_system_values_options *options;
35 
36    /* List of intrinsics that have already been lowered and shouldn't be
37     * lowered again.
38     */
39    struct set *lower_once_list;
40 };
41 
42 static nir_ssa_def *
sanitize_32bit_sysval(nir_builder * b,nir_intrinsic_instr * intrin)43 sanitize_32bit_sysval(nir_builder *b, nir_intrinsic_instr *intrin)
44 {
45    assert(intrin->dest.is_ssa);
46    const unsigned bit_size = intrin->dest.ssa.bit_size;
47    if (bit_size == 32)
48       return NULL;
49 
50    intrin->dest.ssa.bit_size = 32;
51    return nir_u2u(b, &intrin->dest.ssa, bit_size);
52 }
53 
54 static nir_ssa_def*
build_global_group_size(nir_builder * b,unsigned bit_size)55 build_global_group_size(nir_builder *b, unsigned bit_size)
56 {
57    nir_ssa_def *group_size = nir_load_workgroup_size(b);
58    nir_ssa_def *num_workgroups = nir_load_num_workgroups(b, bit_size);
59    return nir_imul(b, nir_u2u(b, group_size, bit_size),
60                       num_workgroups);
61 }
62 
63 static bool
lower_system_value_filter(const nir_instr * instr,const void * _state)64 lower_system_value_filter(const nir_instr *instr, const void *_state)
65 {
66    return instr->type == nir_instr_type_intrinsic;
67 }
68 
69 static nir_ssa_def *
lower_system_value_instr(nir_builder * b,nir_instr * instr,void * _state)70 lower_system_value_instr(nir_builder *b, nir_instr *instr, void *_state)
71 {
72    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
73 
74    /* All the intrinsics we care about are loads */
75    if (!nir_intrinsic_infos[intrin->intrinsic].has_dest)
76       return NULL;
77 
78    assert(intrin->dest.is_ssa);
79    const unsigned bit_size = intrin->dest.ssa.bit_size;
80 
81    switch (intrin->intrinsic) {
82    case nir_intrinsic_load_vertex_id:
83       if (b->shader->options->vertex_id_zero_based) {
84          return nir_iadd(b, nir_load_vertex_id_zero_base(b),
85                             nir_load_first_vertex(b));
86       } else {
87          return NULL;
88       }
89 
90    case nir_intrinsic_load_base_vertex:
91       /**
92        * From the OpenGL 4.6 (11.1.3.9 Shader Inputs) specification:
93        *
94        * "gl_BaseVertex holds the integer value passed to the baseVertex
95        * parameter to the command that resulted in the current shader
96        * invocation. In the case where the command has no baseVertex
97        * parameter, the value of gl_BaseVertex is zero."
98        */
99       if (b->shader->options->lower_base_vertex) {
100          return nir_iand(b, nir_load_is_indexed_draw(b),
101                             nir_load_first_vertex(b));
102       } else {
103          return NULL;
104       }
105 
106    case nir_intrinsic_load_helper_invocation:
107       if (b->shader->options->lower_helper_invocation) {
108          nir_ssa_def *tmp;
109          tmp = nir_ishl(b, nir_imm_int(b, 1),
110                            nir_load_sample_id_no_per_sample(b));
111          tmp = nir_iand(b, nir_load_sample_mask_in(b), tmp);
112          return nir_inot(b, nir_i2b(b, tmp));
113       } else {
114          return NULL;
115       }
116 
117    case nir_intrinsic_load_local_invocation_id:
118    case nir_intrinsic_load_local_invocation_index:
119    case nir_intrinsic_load_workgroup_size:
120       return sanitize_32bit_sysval(b, intrin);
121 
122    case nir_intrinsic_load_deref: {
123       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
124       if (!nir_deref_mode_is(deref, nir_var_system_value))
125          return NULL;
126 
127       nir_ssa_def *column = NULL;
128       if (deref->deref_type != nir_deref_type_var) {
129          /* The only one system values that aren't plane variables are
130           * gl_SampleMask which is always an array of one element and a
131           * couple of ray-tracing intrinsics which are matrices.
132           */
133          assert(deref->deref_type == nir_deref_type_array);
134          assert(deref->arr.index.is_ssa);
135          column = deref->arr.index.ssa;
136          deref = nir_deref_instr_parent(deref);
137          assert(deref->deref_type == nir_deref_type_var);
138          assert(deref->var->data.location == SYSTEM_VALUE_SAMPLE_MASK_IN ||
139                 deref->var->data.location == SYSTEM_VALUE_RAY_OBJECT_TO_WORLD ||
140                 deref->var->data.location == SYSTEM_VALUE_RAY_WORLD_TO_OBJECT ||
141                 deref->var->data.location == SYSTEM_VALUE_MESH_VIEW_INDICES);
142       }
143       nir_variable *var = deref->var;
144 
145       switch (var->data.location) {
146       case SYSTEM_VALUE_INSTANCE_INDEX:
147          return nir_iadd(b, nir_load_instance_id(b),
148                             nir_load_base_instance(b));
149 
150       case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
151       case SYSTEM_VALUE_SUBGROUP_GE_MASK:
152       case SYSTEM_VALUE_SUBGROUP_GT_MASK:
153       case SYSTEM_VALUE_SUBGROUP_LE_MASK:
154       case SYSTEM_VALUE_SUBGROUP_LT_MASK: {
155          nir_intrinsic_op op =
156             nir_intrinsic_from_system_value(var->data.location);
157          nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
158          nir_ssa_dest_init_for_type(&load->instr, &load->dest,
159                                     var->type, NULL);
160          load->num_components = load->dest.ssa.num_components;
161          nir_builder_instr_insert(b, &load->instr);
162          return &load->dest.ssa;
163       }
164 
165       case SYSTEM_VALUE_DEVICE_INDEX:
166          if (b->shader->options->lower_device_index_to_zero)
167             return nir_imm_int(b, 0);
168          break;
169 
170       case SYSTEM_VALUE_GLOBAL_GROUP_SIZE:
171          return build_global_group_size(b, bit_size);
172 
173       case SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL:
174          return nir_load_barycentric(b, nir_intrinsic_load_barycentric_pixel,
175                                      INTERP_MODE_NOPERSPECTIVE);
176 
177       case SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID:
178          return nir_load_barycentric(b, nir_intrinsic_load_barycentric_centroid,
179                                      INTERP_MODE_NOPERSPECTIVE);
180 
181       case SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE:
182          return nir_load_barycentric(b, nir_intrinsic_load_barycentric_sample,
183                                      INTERP_MODE_NOPERSPECTIVE);
184 
185       case SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL:
186          return nir_load_barycentric(b, nir_intrinsic_load_barycentric_pixel,
187                                      INTERP_MODE_SMOOTH);
188 
189       case SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID:
190          return nir_load_barycentric(b, nir_intrinsic_load_barycentric_centroid,
191                                      INTERP_MODE_SMOOTH);
192 
193       case SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE:
194          return nir_load_barycentric(b, nir_intrinsic_load_barycentric_sample,
195                                      INTERP_MODE_SMOOTH);
196 
197       case SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL:
198          return nir_load_barycentric(b, nir_intrinsic_load_barycentric_model,
199                                      INTERP_MODE_NONE);
200 
201       case SYSTEM_VALUE_HELPER_INVOCATION: {
202          /* When demote operation is used, reading the HelperInvocation
203           * needs to use Volatile memory access semantics to provide the
204           * correct (dynamic) value.  See OpDemoteToHelperInvocation.
205           */
206          if (nir_intrinsic_access(intrin) & ACCESS_VOLATILE)
207             return nir_is_helper_invocation(b, 1);
208          break;
209       }
210 
211       case SYSTEM_VALUE_MESH_VIEW_INDICES:
212          return nir_load_mesh_view_indices(b, intrin->dest.ssa.num_components,
213                bit_size, column, .base = 0,
214                .range = intrin->dest.ssa.num_components * bit_size / 8);
215 
216       default:
217          break;
218       }
219 
220       nir_intrinsic_op sysval_op =
221          nir_intrinsic_from_system_value(var->data.location);
222       if (glsl_type_is_matrix(var->type)) {
223          assert(nir_intrinsic_infos[sysval_op].index_map[NIR_INTRINSIC_COLUMN] > 0);
224          unsigned num_cols = glsl_get_matrix_columns(var->type);
225          ASSERTED unsigned num_rows = glsl_get_vector_elements(var->type);
226          assert(num_rows == intrin->dest.ssa.num_components);
227 
228          nir_ssa_def *cols[4];
229          for (unsigned i = 0; i < num_cols; i++) {
230             cols[i] = nir_load_system_value(b, sysval_op, i,
231                                             intrin->dest.ssa.num_components,
232                                             intrin->dest.ssa.bit_size);
233             assert(cols[i]->num_components == num_rows);
234          }
235          return nir_select_from_ssa_def_array(b, cols, num_cols, column);
236       } else {
237          return nir_load_system_value(b, sysval_op, 0,
238                                       intrin->dest.ssa.num_components,
239                                       intrin->dest.ssa.bit_size);
240       }
241    }
242 
243    default:
244       return NULL;
245    }
246 }
247 
248 bool
nir_lower_system_values(nir_shader * shader)249 nir_lower_system_values(nir_shader *shader)
250 {
251    bool progress = nir_shader_lower_instructions(shader,
252                                                  lower_system_value_filter,
253                                                  lower_system_value_instr,
254                                                  NULL);
255 
256    /* We're going to delete the variables so we need to clean up all those
257     * derefs we left lying around.
258     */
259    if (progress)
260       nir_remove_dead_derefs(shader);
261 
262    nir_foreach_variable_with_modes_safe(var, shader, nir_var_system_value)
263       exec_node_remove(&var->node);
264 
265    return progress;
266 }
267 
268 static nir_ssa_def *
lower_id_to_index_no_umod(nir_builder * b,nir_ssa_def * index,nir_ssa_def * size,unsigned bit_size)269 lower_id_to_index_no_umod(nir_builder *b, nir_ssa_def *index,
270                           nir_ssa_def *size, unsigned bit_size)
271 {
272    /* We lower ID to Index with the following formula:
273     *
274     *    id.z = index / (size.x * size.y)
275     *    id.y = (index - (id.z * (size.x * size.y))) / size.x
276     *    id.x = index - ((id.z * (size.x * size.y)) + (id.y * size.x))
277     *
278     * This is more efficient on HW that doesn't have a
279     * modulo division instruction and when the size is either
280     * not compile time known or not a power of two.
281     */
282 
283    nir_ssa_def *size_x = nir_channel(b, size, 0);
284    nir_ssa_def *size_y = nir_channel(b, size, 1);
285    nir_ssa_def *size_x_y = nir_imul(b, size_x, size_y);
286 
287    nir_ssa_def *id_z = nir_udiv(b, index, size_x_y);
288    nir_ssa_def *z_portion = nir_imul(b, id_z, size_x_y);
289    nir_ssa_def *id_y = nir_udiv(b, nir_isub(b, index, z_portion), size_x);
290    nir_ssa_def *y_portion = nir_imul(b, id_y, size_x);
291    nir_ssa_def *id_x = nir_isub(b, index, nir_iadd(b, z_portion, y_portion));
292 
293    return nir_u2u(b, nir_vec3(b, id_x, id_y, id_z), bit_size);
294 }
295 
296 
297 static nir_ssa_def *
lower_id_to_index(nir_builder * b,nir_ssa_def * index,nir_ssa_def * size,unsigned bit_size)298 lower_id_to_index(nir_builder *b, nir_ssa_def *index, nir_ssa_def *size,
299                   unsigned bit_size)
300 {
301    /* We lower gl_LocalInvocationID to gl_LocalInvocationIndex based
302     * on this formula:
303     *
304     *    id.x = index % size.x;
305     *    id.y = (index / size.x) % gl_WorkGroupSize.y;
306     *    id.z = (index / (size.x * size.y)) % size.z;
307     *
308     * However, the final % size.z does nothing unless we
309     * accidentally end up with an index that is too
310     * large so it can safely be omitted.
311     *
312     * Because no hardware supports a local workgroup size greater than
313     * about 1K, this calculation can be done in 32-bit and can save some
314     * 64-bit arithmetic.
315     */
316 
317    nir_ssa_def *size_x = nir_channel(b, size, 0);
318    nir_ssa_def *size_y = nir_channel(b, size, 1);
319 
320    nir_ssa_def *id_x = nir_umod(b, index, size_x);
321    nir_ssa_def *id_y = nir_umod(b, nir_udiv(b, index, size_x), size_y);
322    nir_ssa_def *id_z = nir_udiv(b, index, nir_imul(b, size_x, size_y));
323 
324    return nir_u2u(b, nir_vec3(b, id_x, id_y, id_z), bit_size);
325 }
326 
327 static bool
lower_compute_system_value_filter(const nir_instr * instr,const void * _state)328 lower_compute_system_value_filter(const nir_instr *instr, const void *_state)
329 {
330    return instr->type == nir_instr_type_intrinsic;
331 }
332 
333 static nir_ssa_def *
lower_compute_system_value_instr(nir_builder * b,nir_instr * instr,void * _state)334 lower_compute_system_value_instr(nir_builder *b,
335                                  nir_instr *instr, void *_state)
336 {
337    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
338    struct lower_sysval_state *state = (struct lower_sysval_state *)_state;
339    const nir_lower_compute_system_values_options *options = state->options;
340 
341    /* All the intrinsics we care about are loads */
342    if (!nir_intrinsic_infos[intrin->intrinsic].has_dest)
343       return NULL;
344 
345    assert(intrin->dest.is_ssa);
346    const unsigned bit_size = intrin->dest.ssa.bit_size;
347 
348    switch (intrin->intrinsic) {
349    case nir_intrinsic_load_local_invocation_id:
350       /* If lower_cs_local_id_to_index is true, then we replace
351        * local_invocation_id with a formula based on local_invocation_index.
352        */
353       if (b->shader->options->lower_cs_local_id_to_index ||
354           (options && options->lower_cs_local_id_to_index)) {
355          nir_ssa_def *local_index = nir_load_local_invocation_index(b);
356          nir_ssa_def *local_size = nir_load_workgroup_size(b);
357          return lower_id_to_index(b, local_index, local_size, bit_size);
358       }
359       if (options && options->shuffle_local_ids_for_quad_derivatives &&
360           b->shader->info.cs.derivative_group == DERIVATIVE_GROUP_QUADS &&
361           _mesa_set_search(state->lower_once_list, instr) == NULL) {
362          nir_ssa_def *ids = nir_load_local_invocation_id(b);
363          _mesa_set_add(state->lower_once_list, ids->parent_instr);
364 
365          nir_ssa_def *x = nir_channel(b, ids, 0);
366          nir_ssa_def *y = nir_channel(b, ids, 1);
367          nir_ssa_def *z = nir_channel(b, ids, 2);
368          unsigned size_x = b->shader->info.workgroup_size[0];
369          nir_ssa_def *size_x_imm;
370 
371          if (b->shader->info.workgroup_size_variable)
372             size_x_imm = nir_channel(b, nir_load_workgroup_size(b), 0);
373          else
374             size_x_imm = nir_imm_int(b, size_x);
375 
376          /* Remap indices from:
377           *    | 0| 1| 2| 3|
378           *    | 4| 5| 6| 7|
379           *    | 8| 9|10|11|
380           *    |12|13|14|15|
381           * to:
382           *    | 0| 1| 4| 5|
383           *    | 2| 3| 6| 7|
384           *    | 8| 9|12|13|
385           *    |10|11|14|15|
386           *
387           * That's the layout required by AMD hardware for derivatives to
388           * work. Other hardware may work differently.
389           *
390           * It's a classic tiling pattern that can be implemented by inserting
391           * bit y[0] between bits x[0] and x[1] like this:
392           *
393           *    x[0],y[0],x[1],...x[last],y[1],...,y[last]
394           *
395           * If the width is a power of two, use:
396           *    i = ((x & 1) | ((y & 1) << 1) | ((x & ~1) << 1)) | ((y & ~1) << logbase2(size_x))
397           *
398           * If the width is not a power of two or the local size is variable, use:
399           *    i = ((x & 1) | ((y & 1) << 1) | ((x & ~1) << 1)) + ((y & ~1) * size_x)
400           *
401           * GL_NV_compute_shader_derivatives requires that the width and height
402           * are a multiple of two, which is also a requirement for the second
403           * expression to work.
404           *
405           * The 2D result is: (x,y) = (i % w, i / w)
406           */
407 
408          nir_ssa_def *one = nir_imm_int(b, 1);
409          nir_ssa_def *inv_one = nir_imm_int(b, ~1);
410          nir_ssa_def *x_bit0 = nir_iand(b, x, one);
411          nir_ssa_def *y_bit0 = nir_iand(b, y, one);
412          nir_ssa_def *x_bits_1n = nir_iand(b, x, inv_one);
413          nir_ssa_def *y_bits_1n = nir_iand(b, y, inv_one);
414          nir_ssa_def *bits_01 = nir_ior(b, x_bit0, nir_ishl(b, y_bit0, one));
415          nir_ssa_def *bits_01x = nir_ior(b, bits_01,
416                                          nir_ishl(b, x_bits_1n, one));
417          nir_ssa_def *i;
418 
419          if (!b->shader->info.workgroup_size_variable &&
420              util_is_power_of_two_nonzero(size_x)) {
421             nir_ssa_def *log2_size_x = nir_imm_int(b, util_logbase2(size_x));
422             i = nir_ior(b, bits_01x, nir_ishl(b, y_bits_1n, log2_size_x));
423          } else {
424             i = nir_iadd(b, bits_01x, nir_imul(b, y_bits_1n, size_x_imm));
425          }
426 
427          /* This should be fast if size_x is an immediate or even a power
428           * of two.
429           */
430          x = nir_umod(b, i, size_x_imm);
431          y = nir_udiv(b, i, size_x_imm);
432 
433          return nir_vec3(b, x, y, z);
434       }
435 
436       /* If a workgroup size dimension is 1, then the local invocation id must be zero. */
437       nir_component_mask_t is_zero = 0;
438       is_zero |= b->shader->info.workgroup_size[0] == 1 ? 0x1 : 0x0;
439       is_zero |= b->shader->info.workgroup_size[1] == 1 ? 0x2 : 0x0;
440       is_zero |= b->shader->info.workgroup_size[2] == 1 ? 0x4 : 0x0;
441       if (!b->shader->info.workgroup_size_variable && is_zero) {
442          nir_ssa_scalar defs[3];
443          for (unsigned i = 0; i < 3; i++) {
444             defs[i] = is_zero & (1 << i) ? nir_get_ssa_scalar(nir_imm_zero(b, 1, 32), 0) :
445                                            nir_get_ssa_scalar(&intrin->dest.ssa, i);
446          }
447          return nir_vec_scalars(b, defs, 3);
448       }
449 
450       return NULL;
451 
452    case nir_intrinsic_load_local_invocation_index:
453       /* If lower_cs_local_index_to_id is true, then we replace
454        * local_invocation_index with a formula based on local_invocation_id.
455        */
456       if (b->shader->options->lower_cs_local_index_to_id ||
457           (options && options->lower_local_invocation_index)) {
458          /* From the GLSL man page for gl_LocalInvocationIndex:
459           *
460           *    "The value of gl_LocalInvocationIndex is equal to
461           *    gl_LocalInvocationID.z * gl_WorkGroupSize.x *
462           *    gl_WorkGroupSize.y + gl_LocalInvocationID.y *
463           *    gl_WorkGroupSize.x + gl_LocalInvocationID.x"
464           */
465          nir_ssa_def *local_id = nir_load_local_invocation_id(b);
466          nir_ssa_def *local_size = nir_load_workgroup_size(b);
467          nir_ssa_def *size_x = nir_channel(b, local_size, 0);
468          nir_ssa_def *size_y = nir_channel(b, local_size, 1);
469 
470          /* Because no hardware supports a local workgroup size greater than
471           * about 1K, this calculation can be done in 32-bit and can save some
472           * 64-bit arithmetic.
473           */
474          nir_ssa_def *index;
475          index = nir_imul(b, nir_channel(b, local_id, 2),
476                              nir_imul(b, size_x, size_y));
477          index = nir_iadd(b, index,
478                              nir_imul(b, nir_channel(b, local_id, 1), size_x));
479          index = nir_iadd(b, index, nir_channel(b, local_id, 0));
480          return nir_u2u(b, index, bit_size);
481       } else {
482          return NULL;
483       }
484 
485    case nir_intrinsic_load_workgroup_size:
486       if (b->shader->info.workgroup_size_variable) {
487          /* If the local work group size is variable it can't be lowered at
488           * this point.  We do, however, have to make sure that the intrinsic
489           * is only 32-bit.
490           */
491          return NULL;
492       } else {
493          /* using a 32 bit constant is safe here as no device/driver needs more
494           * than 32 bits for the local size */
495          nir_const_value workgroup_size_const[3];
496          memset(workgroup_size_const, 0, sizeof(workgroup_size_const));
497          workgroup_size_const[0].u32 = b->shader->info.workgroup_size[0];
498          workgroup_size_const[1].u32 = b->shader->info.workgroup_size[1];
499          workgroup_size_const[2].u32 = b->shader->info.workgroup_size[2];
500          return nir_u2u(b, nir_build_imm(b, 3, 32, workgroup_size_const), bit_size);
501       }
502 
503    case nir_intrinsic_load_global_invocation_id_zero_base: {
504       if ((options && options->has_base_workgroup_id) ||
505           !b->shader->options->has_cs_global_id) {
506          nir_ssa_def *group_size = nir_load_workgroup_size(b);
507          nir_ssa_def *group_id = nir_load_workgroup_id(b, bit_size);
508          nir_ssa_def *local_id = nir_load_local_invocation_id(b);
509 
510          return nir_iadd(b, nir_imul(b, group_id,
511                                         nir_u2u(b, group_size, bit_size)),
512                             nir_u2u(b, local_id, bit_size));
513       } else {
514          return NULL;
515       }
516    }
517 
518    case nir_intrinsic_load_global_invocation_id: {
519       if (options && options->has_base_global_invocation_id)
520          return nir_iadd(b, nir_load_global_invocation_id_zero_base(b, bit_size),
521                             nir_load_base_global_invocation_id(b, bit_size));
522       else if ((options && options->has_base_workgroup_id) ||
523                !b->shader->options->has_cs_global_id)
524          return nir_load_global_invocation_id_zero_base(b, bit_size);
525       else
526          return NULL;
527    }
528 
529    case nir_intrinsic_load_global_invocation_index: {
530       /* OpenCL's global_linear_id explicitly removes the global offset before computing this */
531       assert(b->shader->info.stage == MESA_SHADER_KERNEL);
532       nir_ssa_def *global_base_id = nir_load_base_global_invocation_id(b, bit_size);
533       nir_ssa_def *global_id = nir_isub(b, nir_load_global_invocation_id(b, bit_size), global_base_id);
534       nir_ssa_def *global_size = build_global_group_size(b, bit_size);
535 
536       /* index = id.x + ((id.y + (id.z * size.y)) * size.x) */
537       nir_ssa_def *index;
538       index = nir_imul(b, nir_channel(b, global_id, 2),
539                           nir_channel(b, global_size, 1));
540       index = nir_iadd(b, nir_channel(b, global_id, 1), index);
541       index = nir_imul(b, nir_channel(b, global_size, 0), index);
542       index = nir_iadd(b, nir_channel(b, global_id, 0), index);
543       return index;
544    }
545 
546    case nir_intrinsic_load_workgroup_id: {
547       if (options && options->has_base_workgroup_id)
548          return nir_iadd(b, nir_u2u(b, nir_load_workgroup_id_zero_base(b), bit_size),
549                             nir_load_base_workgroup_id(b, bit_size));
550       else if (options && options->lower_workgroup_id_to_index)
551          return lower_id_to_index_no_umod(b, nir_load_workgroup_index(b),
552                                           nir_load_num_workgroups(b, bit_size),
553                                           bit_size);
554 
555       return NULL;
556 
557    }
558 
559    default:
560       return NULL;
561    }
562 }
563 
564 bool
nir_lower_compute_system_values(nir_shader * shader,const nir_lower_compute_system_values_options * options)565 nir_lower_compute_system_values(nir_shader *shader,
566                                 const nir_lower_compute_system_values_options *options)
567 {
568    if (!gl_shader_stage_uses_workgroup(shader->info.stage))
569       return false;
570 
571    struct lower_sysval_state state;
572    state.options = options;
573    state.lower_once_list = _mesa_pointer_set_create(NULL);
574 
575    bool progress =
576       nir_shader_lower_instructions(shader,
577                                     lower_compute_system_value_filter,
578                                     lower_compute_system_value_instr,
579                                     (void*)&state);
580    ralloc_free(state.lower_once_list);
581 
582    /* Update this so as not to lower it again. */
583    if (options && options->shuffle_local_ids_for_quad_derivatives &&
584        shader->info.cs.derivative_group == DERIVATIVE_GROUP_QUADS)
585       shader->info.cs.derivative_group = DERIVATIVE_GROUP_LINEAR;
586 
587    return progress;
588 }
589