• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Intel Corporation
3  * Copyright © 2023 Collabora, Ltd.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #include "util/bitscan.h"
26 #include "util/u_math.h"
27 #include "nir_builder.h"
28 
29 static nir_intrinsic_instr *
dup_mem_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * offset,unsigned align_mul,unsigned align_offset,nir_def * data,unsigned num_components,unsigned bit_size)30 dup_mem_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
31                   nir_def *offset,
32                   unsigned align_mul, unsigned align_offset,
33                   nir_def *data,
34                   unsigned num_components, unsigned bit_size)
35 {
36    const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic];
37 
38    nir_intrinsic_instr *dup =
39       nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
40 
41    nir_src *intrin_offset_src = nir_get_io_offset_src(intrin);
42    for (unsigned i = 0; i < info->num_srcs; i++) {
43       if (i == 0 && data != NULL) {
44          assert(!info->has_dest);
45          assert(&intrin->src[i] != intrin_offset_src);
46          dup->src[i] = nir_src_for_ssa(data);
47       } else if (&intrin->src[i] == intrin_offset_src) {
48          dup->src[i] = nir_src_for_ssa(offset);
49       } else {
50          dup->src[i] = nir_src_for_ssa(intrin->src[i].ssa);
51       }
52    }
53 
54    dup->num_components = num_components;
55    for (unsigned i = 0; i < info->num_indices; i++)
56       dup->const_index[i] = intrin->const_index[i];
57 
58    nir_intrinsic_set_align(dup, align_mul, align_offset);
59 
60    if (info->has_dest) {
61       nir_def_init(&dup->instr, &dup->def, num_components, bit_size);
62    } else {
63       nir_intrinsic_set_write_mask(dup, (1 << num_components) - 1);
64    }
65 
66    nir_builder_instr_insert(b, &dup->instr);
67 
68    return dup;
69 }
70 
71 static nir_def *
shift_load_data_alignbyte_amd(nir_builder * b,nir_def * load,nir_def * offset)72 shift_load_data_alignbyte_amd(nir_builder *b, nir_def *load, nir_def *offset)
73 {
74    /* We don't need to mask the offset by 0x3 because only the low 2 bits matter. */
75    nir_def *comps[NIR_MAX_VEC_COMPONENTS];
76    unsigned i = 0;
77    for (; i < load->num_components - 1; i++)
78       comps[i] = nir_alignbyte_amd(b, nir_channel(b, load, i + 1), nir_channel(b, load, i), offset);
79 
80    /* Shift the last element. */
81    comps[i] = nir_alignbyte_amd(b, nir_channel(b, load, i), nir_channel(b, load, i), offset);
82 
83    return nir_vec(b, comps, load->num_components);
84 }
85 
86 static nir_def *
shift_load_data_shift64(nir_builder * b,nir_def * load,nir_def * offset,uint64_t align_mask)87 shift_load_data_shift64(nir_builder *b, nir_def *load, nir_def *offset, uint64_t align_mask)
88 {
89    nir_def *comps[NIR_MAX_VEC_COMPONENTS];
90    nir_def *shift = nir_imul_imm(b, nir_iand_imm(b, offset, 0x3), 8);
91 
92    for (unsigned i = 0; i < load->num_components - 1; i++) {
93       nir_def *qword = nir_pack_64_2x32_split(
94          b, nir_channel(b, load, i), nir_channel(b, load, i + 1));
95       qword = nir_ushr(b, qword, shift);
96       comps[i] = nir_unpack_64_2x32_split_x(b, qword);
97       if (i == load->num_components - 2)
98          comps[i + 1] = nir_unpack_64_2x32_split_y(b, qword);
99    }
100 
101    return nir_vec(b, comps, load->num_components);
102 }
103 
104 static nir_def *
shift_load_data_scalar(nir_builder * b,nir_def * load,nir_def * offset,uint64_t align_mask)105 shift_load_data_scalar(nir_builder *b, nir_def *load, nir_def *offset, uint64_t align_mask)
106 {
107    nir_def *pad = nir_iand_imm(b, offset, align_mask);
108    nir_def *shift = nir_imul_imm(b, pad, 8);
109 
110    nir_def *shifted = nir_ushr(b, load, shift);
111 
112    if (load->num_components > 1) {
113       nir_def *rev_shift =
114          nir_isub_imm(b, load->bit_size, shift);
115       nir_def *rev_shifted = nir_ishl(b, load, rev_shift);
116 
117       nir_def *comps[NIR_MAX_VEC_COMPONENTS];
118       for (unsigned i = 1; i < load->num_components; i++)
119          comps[i - 1] = nir_channel(b, rev_shifted, i);
120 
121       comps[load->num_components - 1] =
122          nir_imm_zero(b, 1, load->bit_size);
123 
124       rev_shifted = nir_vec(b, comps, load->num_components);
125       shifted = nir_bcsel(b, nir_ieq_imm(b, shift, 0), load,
126                           nir_ior(b, shifted, rev_shifted));
127    }
128    return shifted;
129 }
130 
131 static nir_def *
shift_load_data(nir_builder * b,nir_def * load,nir_def * offset,uint64_t align_mask,nir_mem_access_shift_method method)132 shift_load_data(nir_builder *b, nir_def *load, nir_def *offset, uint64_t align_mask,
133                 nir_mem_access_shift_method method)
134 {
135    bool use_alignbyte = method == nir_mem_access_shift_method_bytealign_amd &&
136                         load->bit_size == 32 && align_mask == 0x3;
137    bool use_shift64 =
138       method == nir_mem_access_shift_method_shift64 && load->bit_size == 32 && align_mask == 0x3 &&
139       load->num_components >= 2;
140 
141    offset = nir_u2u32(b, offset);
142 
143    if (use_alignbyte)
144       return shift_load_data_alignbyte_amd(b, load, offset);
145    else if (use_shift64)
146       return shift_load_data_shift64(b, load, offset, align_mask);
147    else
148       return shift_load_data_scalar(b, load, offset, align_mask);
149 }
150 
151 static bool
lower_mem_load(nir_builder * b,nir_intrinsic_instr * intrin,nir_lower_mem_access_bit_sizes_cb mem_access_size_align_cb,const void * cb_data)152 lower_mem_load(nir_builder *b, nir_intrinsic_instr *intrin,
153                nir_lower_mem_access_bit_sizes_cb mem_access_size_align_cb,
154                const void *cb_data)
155 {
156    const unsigned bit_size = intrin->def.bit_size;
157    const unsigned num_components = intrin->def.num_components;
158    const unsigned bytes_read = num_components * (bit_size / 8);
159    const uint32_t align_mul = nir_intrinsic_align_mul(intrin);
160    const uint32_t whole_align_offset = nir_intrinsic_align_offset(intrin);
161    const uint32_t whole_align = nir_intrinsic_align(intrin);
162    const enum gl_access_qualifier access =
163       nir_intrinsic_has_access(intrin) ? nir_intrinsic_access(intrin) : 0;
164    nir_src *offset_src = nir_get_io_offset_src(intrin);
165    const bool offset_is_const = nir_src_is_const(*offset_src);
166    nir_def *offset = offset_src->ssa;
167 
168    nir_mem_access_size_align requested =
169       mem_access_size_align_cb(intrin->intrinsic, bytes_read,
170                                bit_size, align_mul, whole_align_offset,
171                                offset_is_const, access, cb_data);
172 
173    assert(requested.num_components > 0);
174    assert(requested.bit_size > 0);
175    assert(util_is_power_of_two_nonzero(align_mul));
176    assert(util_is_power_of_two_nonzero(requested.align));
177    if (requested.num_components == num_components &&
178        requested.bit_size == bit_size &&
179        requested.align <= whole_align)
180       return false;
181 
182    /* Otherwise, we have to break it into chunks.  We could end up with as
183     * many as 32 chunks if we're loading a u64vec16 as individual dwords.
184     */
185    nir_def *chunks[32];
186    unsigned num_chunks = 0;
187    unsigned chunk_start = 0;
188    while (chunk_start < bytes_read) {
189       const unsigned bytes_left = bytes_read - chunk_start;
190       const uint32_t chunk_align_offset =
191          (whole_align_offset + chunk_start) % align_mul;
192       const uint32_t chunk_align =
193          nir_combined_align(align_mul, chunk_align_offset);
194       requested = mem_access_size_align_cb(intrin->intrinsic, bytes_left,
195                                            bit_size, align_mul, chunk_align_offset,
196                                            offset_is_const, access, cb_data);
197 
198       unsigned chunk_bytes;
199       assert(requested.num_components > 0);
200       assert(requested.bit_size > 0);
201       assert(util_is_power_of_two_nonzero(requested.align));
202       if (align_mul < requested.align) {
203          /* For this case, we need to be able to shift the value so we assume
204           * the alignment is less than the size of a single component.  This
205           * ensures that we don't need to upcast in order to shift.
206           */
207          assert(requested.bit_size >= requested.align * 8);
208 
209          uint64_t align_mask = requested.align - 1;
210          nir_def *chunk_offset = nir_iadd_imm(b, offset, chunk_start);
211          nir_def *aligned_offset = nir_iand_imm(b, chunk_offset, ~align_mask);
212 
213          nir_intrinsic_instr *load =
214             dup_mem_intrinsic(b, intrin, aligned_offset,
215                               requested.align, 0, NULL,
216                               requested.num_components, requested.bit_size);
217 
218          unsigned max_pad = requested.align - chunk_align;
219          unsigned requested_bytes =
220             requested.num_components * requested.bit_size / 8;
221          chunk_bytes = MIN2(bytes_left, requested_bytes - max_pad);
222 
223          nir_def *shifted = shift_load_data(
224             b, &load->def, chunk_offset, align_mask, requested.shift);
225 
226          unsigned chunk_bit_size = MIN2(8 << (ffs(chunk_bytes) - 1), bit_size);
227          unsigned chunk_num_components = chunk_bytes / (chunk_bit_size / 8);
228 
229          /* There's no guarantee that chunk_num_components is a valid NIR
230           * vector size, so just loop one chunk component at a time
231           */
232          for (unsigned i = 0; i < chunk_num_components; i++) {
233             assert(num_chunks < ARRAY_SIZE(chunks));
234             chunks[num_chunks++] =
235                nir_extract_bits(b, &shifted, 1, i * chunk_bit_size,
236                                 1, chunk_bit_size);
237          }
238       } else if (chunk_align_offset % requested.align) {
239          /* In this case, we know how much to adjust the offset */
240          uint32_t delta = chunk_align_offset % requested.align;
241          nir_def *load_offset =
242             nir_iadd_imm(b, offset, chunk_start - (int)delta);
243 
244          const uint32_t load_align_offset =
245             (chunk_align_offset - delta) % align_mul;
246 
247          nir_intrinsic_instr *load =
248             dup_mem_intrinsic(b, intrin, load_offset,
249                               align_mul, load_align_offset, NULL,
250                               requested.num_components, requested.bit_size);
251 
252          assert(requested.bit_size >= 8);
253          chunk_bytes = requested.num_components * (requested.bit_size / 8);
254          assert(chunk_bytes > delta);
255          chunk_bytes -= delta;
256 
257          unsigned chunk_bit_size = MIN2(8 << (ffs(chunk_bytes) - 1), bit_size);
258          unsigned chunk_num_components = chunk_bytes / (chunk_bit_size / 8);
259 
260          /* There's no guarantee that chunk_num_components is a valid NIR
261           * vector size, so just loop one chunk component at a time
262           */
263          nir_def *chunk_data = &load->def;
264          for (unsigned i = 0; i < chunk_num_components; i++) {
265             assert(num_chunks < ARRAY_SIZE(chunks));
266             chunks[num_chunks++] =
267                nir_extract_bits(b, &chunk_data, 1,
268                                 delta * 8 + i * chunk_bit_size,
269                                 1, chunk_bit_size);
270          }
271       } else {
272          nir_def *chunk_offset = nir_iadd_imm(b, offset, chunk_start);
273          nir_intrinsic_instr *load =
274             dup_mem_intrinsic(b, intrin, chunk_offset,
275                               align_mul, chunk_align_offset, NULL,
276                               requested.num_components, requested.bit_size);
277 
278          chunk_bytes = requested.num_components * (requested.bit_size / 8);
279          assert(num_chunks < ARRAY_SIZE(chunks));
280          chunks[num_chunks++] = &load->def;
281       }
282 
283       chunk_start += chunk_bytes;
284    }
285 
286    nir_def *result = nir_extract_bits(b, chunks, num_chunks, 0,
287                                       num_components, bit_size);
288    nir_def_replace(&intrin->def, result);
289 
290    return true;
291 }
292 
293 static bool
lower_mem_store(nir_builder * b,nir_intrinsic_instr * intrin,nir_lower_mem_access_bit_sizes_cb mem_access_size_align_cb,const void * cb_data,bool allow_unaligned_stores_as_atomics)294 lower_mem_store(nir_builder *b, nir_intrinsic_instr *intrin,
295                 nir_lower_mem_access_bit_sizes_cb mem_access_size_align_cb,
296                 const void *cb_data, bool allow_unaligned_stores_as_atomics)
297 {
298    nir_def *value = intrin->src[0].ssa;
299 
300    assert(intrin->num_components == value->num_components);
301    const unsigned bit_size = value->bit_size;
302    const unsigned byte_size = bit_size / 8;
303    const unsigned num_components = intrin->num_components;
304    const unsigned bytes_written = num_components * byte_size;
305    const uint32_t align_mul = nir_intrinsic_align_mul(intrin);
306    const uint32_t whole_align_offset = nir_intrinsic_align_offset(intrin);
307    const uint32_t whole_align = nir_intrinsic_align(intrin);
308    const enum gl_access_qualifier access =
309       nir_intrinsic_has_access(intrin) ? nir_intrinsic_access(intrin) : 0;
310    nir_src *offset_src = nir_get_io_offset_src(intrin);
311    const bool offset_is_const = nir_src_is_const(*offset_src);
312    nir_def *offset = offset_src->ssa;
313 
314    nir_component_mask_t writemask = nir_intrinsic_write_mask(intrin);
315    assert(writemask < (1 << num_components));
316 
317    nir_mem_access_size_align requested =
318       mem_access_size_align_cb(intrin->intrinsic, bytes_written,
319                                bit_size, align_mul, whole_align_offset,
320                                offset_is_const, access, cb_data);
321 
322    assert(requested.num_components > 0);
323    assert(requested.bit_size > 0);
324    assert(util_is_power_of_two_nonzero(align_mul));
325    assert(util_is_power_of_two_nonzero(requested.align));
326    if (requested.num_components == num_components &&
327        requested.bit_size == bit_size &&
328        requested.align <= whole_align &&
329        writemask == BITFIELD_MASK(num_components))
330       return false;
331 
332    assert(byte_size <= sizeof(uint64_t));
333    BITSET_DECLARE(mask, NIR_MAX_VEC_COMPONENTS * sizeof(uint64_t));
334    BITSET_ZERO(mask);
335 
336    for (unsigned i = 0; i < num_components; i++) {
337       if (writemask & (1u << i)) {
338          BITSET_SET_RANGE_INSIDE_WORD(mask, i * byte_size,
339                                       ((i + 1) * byte_size) - 1);
340       }
341    }
342 
343    while (BITSET_FFS(mask) != 0) {
344       const uint32_t chunk_start = BITSET_FFS(mask) - 1;
345 
346       uint32_t end;
347       for (end = chunk_start + 1; end < bytes_written; end++) {
348          if (!(BITSET_TEST(mask, end)))
349             break;
350       }
351       /* The size of the current contiguous chunk in bytes */
352       const uint32_t max_chunk_bytes = end - chunk_start;
353       const uint32_t chunk_align_offset =
354          (whole_align_offset + chunk_start) % align_mul;
355       const uint32_t chunk_align =
356          nir_combined_align(align_mul, chunk_align_offset);
357 
358       requested = mem_access_size_align_cb(intrin->intrinsic, max_chunk_bytes,
359                                            bit_size, align_mul, chunk_align_offset,
360                                            offset_is_const, access, cb_data);
361 
362       uint32_t chunk_bytes = requested.num_components * (requested.bit_size / 8);
363 
364       assert(requested.num_components > 0);
365       assert(requested.bit_size > 0);
366       assert(util_is_power_of_two_nonzero(requested.align));
367       if (chunk_align < requested.align ||
368           chunk_bytes > max_chunk_bytes) {
369          /* Otherwise the caller made a mistake with their return values. */
370          assert(chunk_bytes <= 4);
371          assert(allow_unaligned_stores_as_atomics ||
372                 intrin->intrinsic == nir_intrinsic_store_scratch);
373 
374          /* We'll turn this into a pair of 32-bit atomics to modify only the right
375           * bits of memory.
376           */
377          requested = (nir_mem_access_size_align){
378             .align = 4,
379             .bit_size = 32,
380             .num_components = 1,
381          };
382 
383          uint64_t align_mask = requested.align - 1;
384          nir_def *chunk_offset = nir_iadd_imm(b, offset, chunk_start);
385          nir_def *pad = chunk_align < 4 ? nir_iand_imm(b, chunk_offset, align_mask) : nir_imm_intN_t(b, 0, chunk_offset->bit_size);
386          chunk_offset = nir_iand_imm(b, chunk_offset, ~align_mask);
387 
388          unsigned max_pad = chunk_align < requested.align ? requested.align - chunk_align : 0;
389          unsigned requested_bytes =
390             requested.num_components * requested.bit_size / 8;
391          chunk_bytes = MIN2(max_chunk_bytes, requested_bytes - max_pad);
392          unsigned chunk_bits = chunk_bytes * 8;
393 
394          nir_def *data;
395          if (chunk_bits == 24) {
396             /* This is a bit of a special case because we don't have 24-bit integers */
397             data = nir_extract_bits(b, &value, 1, chunk_start * 8, 3, 8);
398             data = nir_pack_bits(b, nir_pad_vector_imm_int(b, data, 0, 4), 32);
399          } else {
400             data = nir_extract_bits(b, &value, 1, chunk_start * 8, 1, chunk_bits);
401             data = nir_u2u32(b, data);
402          }
403 
404          nir_def *iand_mask = nir_imm_int(b, (1 << chunk_bits) - 1);
405 
406          if (chunk_align < requested.align) {
407             nir_def *shift = nir_u2u32(b, nir_imul_imm(b, pad, 8));
408             data = nir_ishl(b, data, shift);
409             iand_mask = nir_ishl(b, iand_mask, shift);
410          }
411 
412          iand_mask = nir_inot(b, iand_mask);
413 
414          switch (intrin->intrinsic) {
415          case nir_intrinsic_store_ssbo:
416             nir_ssbo_atomic(b, 32, intrin->src[1].ssa, chunk_offset, iand_mask,
417                             .atomic_op = nir_atomic_op_iand,
418                             .access = nir_intrinsic_access(intrin));
419             nir_ssbo_atomic(b, 32, intrin->src[1].ssa, chunk_offset, data,
420                             .atomic_op = nir_atomic_op_ior,
421                             .access = nir_intrinsic_access(intrin));
422             break;
423          case nir_intrinsic_store_global:
424             nir_global_atomic(b, 32, chunk_offset, iand_mask,
425                               .atomic_op = nir_atomic_op_iand);
426             nir_global_atomic(b, 32, chunk_offset, data,
427                               .atomic_op = nir_atomic_op_ior);
428             break;
429          case nir_intrinsic_store_shared:
430             nir_shared_atomic(b, 32, chunk_offset, iand_mask,
431                               .atomic_op = nir_atomic_op_iand,
432                               .base = nir_intrinsic_base(intrin));
433             nir_shared_atomic(b, 32, chunk_offset, data,
434                               .atomic_op = nir_atomic_op_ior,
435                               .base = nir_intrinsic_base(intrin));
436             break;
437          case nir_intrinsic_store_scratch: {
438             nir_def *value = nir_load_scratch(b, 1, 32, chunk_offset);
439             value = nir_iand(b, value, iand_mask);
440             value = nir_ior(b, value, data);
441             nir_store_scratch(b, value, chunk_offset);
442             break;
443          }
444          default:
445             unreachable("Unsupported unaligned store");
446          }
447       } else {
448          nir_def *packed = nir_extract_bits(b, &value, 1, chunk_start * 8,
449                                             requested.num_components,
450                                             requested.bit_size);
451 
452          nir_def *chunk_offset = nir_iadd_imm(b, offset, chunk_start);
453          dup_mem_intrinsic(b, intrin, chunk_offset,
454                            align_mul, chunk_align_offset, packed,
455                            requested.num_components, requested.bit_size);
456       }
457       BITSET_CLEAR_RANGE(mask, chunk_start, (chunk_start + chunk_bytes - 1));
458    }
459 
460    nir_instr_remove(&intrin->instr);
461 
462    return true;
463 }
464 
465 static nir_variable_mode
intrin_to_variable_mode(nir_intrinsic_op intrin)466 intrin_to_variable_mode(nir_intrinsic_op intrin)
467 {
468    switch (intrin) {
469    case nir_intrinsic_load_kernel_input:
470       return nir_var_uniform;
471 
472    case nir_intrinsic_load_ubo:
473    case nir_intrinsic_ldc_nv:
474    case nir_intrinsic_ldcx_nv:
475       return nir_var_mem_ubo;
476 
477    case nir_intrinsic_load_push_constant:
478       return nir_var_mem_push_const;
479 
480    case nir_intrinsic_load_global:
481    case nir_intrinsic_store_global:
482       return nir_var_mem_global;
483 
484    case nir_intrinsic_load_global_constant:
485    case nir_intrinsic_load_constant:
486       return nir_var_mem_constant;
487 
488    case nir_intrinsic_load_ssbo:
489    case nir_intrinsic_store_ssbo:
490       return nir_var_mem_ssbo;
491 
492    case nir_intrinsic_load_shared:
493    case nir_intrinsic_store_shared:
494       return nir_var_mem_shared;
495 
496    case nir_intrinsic_load_scratch:
497    case nir_intrinsic_store_scratch:
498       return nir_var_shader_temp | nir_var_function_temp;
499 
500    case nir_intrinsic_load_task_payload:
501    case nir_intrinsic_store_task_payload:
502       return nir_var_mem_task_payload;
503 
504    default:
505       return 0;
506    }
507 }
508 
509 static bool
lower_mem_access_instr(nir_builder * b,nir_instr * instr,void * _data)510 lower_mem_access_instr(nir_builder *b, nir_instr *instr, void *_data)
511 {
512    const nir_lower_mem_access_bit_sizes_options *state = _data;
513 
514    if (instr->type != nir_instr_type_intrinsic)
515       return false;
516 
517    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
518    if (!(state->modes & intrin_to_variable_mode(intrin->intrinsic)))
519       return false;
520 
521    b->cursor = nir_after_instr(instr);
522 
523    switch (intrin->intrinsic) {
524    case nir_intrinsic_load_ubo:
525    case nir_intrinsic_load_push_constant:
526    case nir_intrinsic_load_global:
527    case nir_intrinsic_load_global_constant:
528    case nir_intrinsic_load_constant:
529    case nir_intrinsic_load_ssbo:
530    case nir_intrinsic_load_shared:
531    case nir_intrinsic_load_scratch:
532    case nir_intrinsic_load_task_payload:
533    case nir_intrinsic_ldc_nv:
534    case nir_intrinsic_ldcx_nv:
535    case nir_intrinsic_load_kernel_input:
536       return lower_mem_load(b, intrin, state->callback, state->cb_data);
537 
538    case nir_intrinsic_store_global:
539    case nir_intrinsic_store_ssbo:
540    case nir_intrinsic_store_shared:
541    case nir_intrinsic_store_scratch:
542    case nir_intrinsic_store_task_payload:
543       return lower_mem_store(b, intrin, state->callback, state->cb_data,
544                              state->may_lower_unaligned_stores_to_atomics);
545 
546    default:
547       return false;
548    }
549 }
550 
551 bool
nir_lower_mem_access_bit_sizes(nir_shader * shader,const nir_lower_mem_access_bit_sizes_options * options)552 nir_lower_mem_access_bit_sizes(nir_shader *shader,
553                                const nir_lower_mem_access_bit_sizes_options *options)
554 {
555    return nir_shader_instructions_pass(shader, lower_mem_access_instr,
556                                        nir_metadata_control_flow,
557                                        (void *)options);
558 }
559