• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Google, Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "ir3_nir.h"
7 #include "nir.h"
8 #include "nir_builder.h"
9 #include "nir_builder_opcodes.h"
10 #include "nir_intrinsics.h"
11 
12 /*
13  * Lowering for 64b intrinsics generated with OpenCL or with
14  * VK_KHR_buffer_device_address. All our intrinsics from a hw
15  * standpoint are 32b, so we just need to combine in zero for
16  * the upper 32bits and let the other nir passes clean up the mess.
17  */
18 
19 static bool
lower_64b_intrinsics_filter(const nir_instr * instr,const void * unused)20 lower_64b_intrinsics_filter(const nir_instr *instr, const void *unused)
21 {
22    (void)unused;
23 
24    if (instr->type != nir_instr_type_intrinsic)
25       return false;
26 
27    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
28 
29    if (intr->intrinsic == nir_intrinsic_load_deref ||
30        intr->intrinsic == nir_intrinsic_store_deref)
31       return false;
32 
33    if (is_intrinsic_store(intr->intrinsic))
34       return nir_src_bit_size(intr->src[0]) == 64;
35 
36    /* skip over ssbo atomics, we'll lower them later */
37    if (intr->intrinsic == nir_intrinsic_ssbo_atomic ||
38        intr->intrinsic == nir_intrinsic_ssbo_atomic_swap ||
39        intr->intrinsic == nir_intrinsic_global_atomic_ir3 ||
40        intr->intrinsic == nir_intrinsic_global_atomic_swap_ir3)
41       return false;
42 
43    if (nir_intrinsic_dest_components(intr) == 0)
44       return false;
45 
46    return intr->def.bit_size == 64;
47 }
48 
49 static nir_def *
lower_64b_intrinsics(nir_builder * b,nir_instr * instr,void * unused)50 lower_64b_intrinsics(nir_builder *b, nir_instr *instr, void *unused)
51 {
52    (void)unused;
53 
54    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
55 
56    /* We could be *slightly* more clever and, for ex, turn a 64b vec4
57     * load into two 32b vec4 loads, rather than 4 32b vec2 loads.
58     */
59 
60    if (is_intrinsic_store(intr->intrinsic)) {
61       unsigned offset_src_idx;
62       switch (intr->intrinsic) {
63       case nir_intrinsic_store_ssbo:
64       case nir_intrinsic_store_global_ir3:
65       case nir_intrinsic_store_per_view_output:
66          offset_src_idx = 2;
67          break;
68       default:
69          offset_src_idx = 1;
70       }
71 
72       unsigned num_comp = nir_intrinsic_src_components(intr, 0);
73       unsigned wrmask = nir_intrinsic_has_write_mask(intr) ?
74          nir_intrinsic_write_mask(intr) : BITSET_MASK(num_comp);
75       nir_def *val = intr->src[0].ssa;
76       nir_def *off = intr->src[offset_src_idx].ssa;
77 
78       for (unsigned i = 0; i < num_comp; i++) {
79          if (!(wrmask & BITFIELD_BIT(i)))
80             continue;
81 
82          nir_def *c64 = nir_channel(b, val, i);
83          nir_def *c32 = nir_unpack_64_2x32(b, c64);
84 
85          nir_intrinsic_instr *store =
86             nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intr->instr));
87          store->num_components = 2;
88          store->src[0] = nir_src_for_ssa(c32);
89          store->src[offset_src_idx] = nir_src_for_ssa(off);
90 
91          if (nir_intrinsic_has_write_mask(intr))
92             nir_intrinsic_set_write_mask(store, 0x3);
93          nir_builder_instr_insert(b, &store->instr);
94 
95          off = nir_iadd_imm(b, off, 8);
96       }
97 
98       return NIR_LOWER_INSTR_PROGRESS_REPLACE;
99    }
100 
101    unsigned num_comp = nir_intrinsic_dest_components(intr);
102 
103    nir_def *def = &intr->def;
104    def->bit_size = 32;
105 
106    /* load_kernel_input is handled specially, lowering to two 32b inputs:
107     */
108    if (intr->intrinsic == nir_intrinsic_load_kernel_input) {
109       assert(num_comp == 1);
110 
111       nir_def *offset = nir_iadd_imm(b,
112             intr->src[0].ssa, 4);
113 
114       nir_def *upper = nir_load_kernel_input(b, 1, 32, offset);
115 
116       return nir_pack_64_2x32_split(b, def, upper);
117    }
118 
119    nir_def *components[num_comp];
120 
121    if (is_intrinsic_load(intr->intrinsic)) {
122       unsigned offset_src_idx;
123       switch(intr->intrinsic) {
124       case nir_intrinsic_load_ssbo:
125       case nir_intrinsic_load_ubo:
126       case nir_intrinsic_load_global_ir3:
127       case nir_intrinsic_load_per_view_output:
128          offset_src_idx = 1;
129          break;
130       default:
131          offset_src_idx = 0;
132       }
133 
134       nir_def *off = intr->src[offset_src_idx].ssa;
135 
136       for (unsigned i = 0; i < num_comp; i++) {
137          nir_intrinsic_instr *load =
138             nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intr->instr));
139          load->num_components = 2;
140          load->src[offset_src_idx] = nir_src_for_ssa(off);
141 
142          nir_def_init(&load->instr, &load->def, 2, 32);
143          nir_builder_instr_insert(b, &load->instr);
144 
145          components[i] = nir_pack_64_2x32(b, &load->def);
146 
147          off = nir_iadd_imm(b, off, 8);
148       }
149    } else {
150       /* The remaining (non load/store) intrinsics just get zero-
151        * extended from 32b to 64b:
152        */
153       for (unsigned i = 0; i < num_comp; i++) {
154          nir_def *c = nir_channel(b, def, i);
155          components[i] = nir_pack_64_2x32_split(b, c, nir_imm_zero(b, 1, 32));
156       }
157    }
158 
159    return nir_build_alu_src_arr(b, nir_op_vec(num_comp), components);
160 }
161 
162 bool
ir3_nir_lower_64b_intrinsics(nir_shader * shader)163 ir3_nir_lower_64b_intrinsics(nir_shader *shader)
164 {
165    return nir_shader_lower_instructions(
166          shader, lower_64b_intrinsics_filter,
167          lower_64b_intrinsics, NULL);
168 }
169 
170 /*
171  * Lowering for 64b undef instructions, splitting into a two 32b undefs
172  */
173 
174 static nir_def *
lower_64b_undef(nir_builder * b,nir_instr * instr,void * unused)175 lower_64b_undef(nir_builder *b, nir_instr *instr, void *unused)
176 {
177    (void)unused;
178 
179    nir_undef_instr *undef = nir_instr_as_undef(instr);
180    unsigned num_comp = undef->def.num_components;
181    nir_def *components[num_comp];
182 
183    for (unsigned i = 0; i < num_comp; i++) {
184       nir_def *lowered = nir_undef(b, 2, 32);
185 
186       components[i] = nir_pack_64_2x32_split(b,
187                                              nir_channel(b, lowered, 0),
188                                              nir_channel(b, lowered, 1));
189    }
190 
191    return nir_build_alu_src_arr(b, nir_op_vec(num_comp), components);
192 }
193 
194 static bool
lower_64b_undef_filter(const nir_instr * instr,const void * unused)195 lower_64b_undef_filter(const nir_instr *instr, const void *unused)
196 {
197    (void)unused;
198 
199    return instr->type == nir_instr_type_undef &&
200       nir_instr_as_undef(instr)->def.bit_size == 64;
201 }
202 
203 bool
ir3_nir_lower_64b_undef(nir_shader * shader)204 ir3_nir_lower_64b_undef(nir_shader *shader)
205 {
206    return nir_shader_lower_instructions(
207          shader, lower_64b_undef_filter,
208          lower_64b_undef, NULL);
209 }
210 
211 /*
212  * Lowering for load_global/store_global with 64b addresses to ir3
213  * variants, which instead take a uvec2_32
214  */
215 
216 static bool
lower_64b_global_filter(const nir_instr * instr,const void * unused)217 lower_64b_global_filter(const nir_instr *instr, const void *unused)
218 {
219    (void)unused;
220 
221    if (instr->type != nir_instr_type_intrinsic)
222       return false;
223 
224    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
225    switch (intr->intrinsic) {
226    case nir_intrinsic_load_global:
227    case nir_intrinsic_load_global_constant:
228    case nir_intrinsic_store_global:
229    case nir_intrinsic_global_atomic:
230    case nir_intrinsic_global_atomic_swap:
231       return true;
232    default:
233       return false;
234    }
235 }
236 
237 static nir_def *
lower_64b_global(nir_builder * b,nir_instr * instr,void * unused)238 lower_64b_global(nir_builder *b, nir_instr *instr, void *unused)
239 {
240    (void)unused;
241 
242    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
243    bool load = intr->intrinsic != nir_intrinsic_store_global;
244 
245    nir_def *addr64 = intr->src[load ? 0 : 1].ssa;
246    nir_def *addr = nir_unpack_64_2x32(b, addr64);
247 
248    /*
249     * Note that we can get vec8/vec16 with OpenCL.. we need to split
250     * those up into max 4 components per load/store.
251     */
252 
253    if (intr->intrinsic == nir_intrinsic_global_atomic) {
254       return nir_global_atomic_ir3(
255             b, intr->def.bit_size, addr,
256             intr->src[1].ssa,
257          .atomic_op = nir_intrinsic_atomic_op(intr));
258    } else if (intr->intrinsic == nir_intrinsic_global_atomic_swap) {
259       return nir_global_atomic_swap_ir3(
260          b, intr->def.bit_size, addr,
261          intr->src[1].ssa, intr->src[2].ssa,
262          .atomic_op = nir_intrinsic_atomic_op(intr));
263    }
264 
265    if (load) {
266       unsigned num_comp = nir_intrinsic_dest_components(intr);
267       nir_def *components[num_comp];
268       for (unsigned off = 0; off < num_comp;) {
269          unsigned c = MIN2(num_comp - off, 4);
270          nir_def *val = nir_load_global_ir3(
271                b, c, intr->def.bit_size,
272                addr, nir_imm_int(b, off));
273          for (unsigned i = 0; i < c; i++) {
274             components[off++] = nir_channel(b, val, i);
275          }
276       }
277       return nir_build_alu_src_arr(b, nir_op_vec(num_comp), components);
278    } else {
279       unsigned num_comp = nir_intrinsic_src_components(intr, 0);
280       nir_def *value = intr->src[0].ssa;
281       for (unsigned off = 0; off < num_comp; off += 4) {
282          unsigned c = MIN2(num_comp - off, 4);
283          nir_def *v = nir_channels(b, value, BITFIELD_MASK(c) << off);
284          nir_store_global_ir3(b, v, addr, nir_imm_int(b, off));
285       }
286       return NIR_LOWER_INSTR_PROGRESS_REPLACE;
287    }
288 }
289 
290 bool
ir3_nir_lower_64b_global(nir_shader * shader)291 ir3_nir_lower_64b_global(nir_shader *shader)
292 {
293    return nir_shader_lower_instructions(
294          shader, lower_64b_global_filter,
295          lower_64b_global, NULL);
296 }
297 
298 /*
299  * Lowering for 64b registers:
300  * - @decl_reg -> split in two 32b ones
301  * - @store_reg -> unpack_64_2x32_split_x/y and two separate stores
302  * - @load_reg -> two separate loads and pack_64_2x32_split
303  */
304 
305 static void
lower_64b_reg(nir_builder * b,nir_intrinsic_instr * reg)306 lower_64b_reg(nir_builder *b, nir_intrinsic_instr *reg)
307 {
308    unsigned num_components = nir_intrinsic_num_components(reg);
309    unsigned num_array_elems = nir_intrinsic_num_array_elems(reg);
310 
311    nir_def *reg_hi = nir_decl_reg(b, num_components, 32, num_array_elems);
312    nir_def *reg_lo = nir_decl_reg(b, num_components, 32, num_array_elems);
313 
314    nir_foreach_reg_store_safe (store_reg_src, reg) {
315       nir_intrinsic_instr *store =
316          nir_instr_as_intrinsic(nir_src_parent_instr(store_reg_src));
317       b->cursor = nir_before_instr(&store->instr);
318 
319       nir_def *packed = store->src[0].ssa;
320       nir_def *unpacked_lo = nir_unpack_64_2x32_split_x(b, packed);
321       nir_def *unpacked_hi = nir_unpack_64_2x32_split_y(b, packed);
322       int base = nir_intrinsic_base(store);
323 
324       if (store->intrinsic == nir_intrinsic_store_reg) {
325          nir_build_store_reg(b, unpacked_lo, reg_lo, .base = base);
326          nir_build_store_reg(b, unpacked_hi, reg_hi, .base = base);
327       } else {
328          assert(store->intrinsic == nir_intrinsic_store_reg_indirect);
329 
330          nir_def *offset = store->src[2].ssa;
331          nir_store_reg_indirect(b, unpacked_lo, reg_lo, offset, .base = base);
332          nir_store_reg_indirect(b, unpacked_hi, reg_hi, offset, .base = base);
333       }
334 
335       nir_instr_remove(&store->instr);
336    }
337 
338    nir_foreach_reg_load_safe (load_reg_src, reg) {
339       nir_intrinsic_instr *load =
340          nir_instr_as_intrinsic(nir_src_parent_instr(load_reg_src));
341       b->cursor = nir_before_instr(&load->instr);
342 
343       int base = nir_intrinsic_base(load);
344       nir_def *load_lo, *load_hi;
345 
346       if (load->intrinsic == nir_intrinsic_load_reg) {
347          load_lo =
348             nir_build_load_reg(b, num_components, 32, reg_lo, .base = base);
349          load_hi =
350             nir_build_load_reg(b, num_components, 32, reg_hi, .base = base);
351       } else {
352          assert(load->intrinsic == nir_intrinsic_load_reg_indirect);
353 
354          nir_def *offset = load->src[1].ssa;
355          load_lo = nir_load_reg_indirect(b, num_components, 32, reg_lo, offset,
356                                          .base = base);
357          load_hi = nir_load_reg_indirect(b, num_components, 32, reg_hi, offset,
358                                          .base = base);
359       }
360 
361       nir_def *packed = nir_pack_64_2x32_split(b, load_lo, load_hi);
362       nir_def_rewrite_uses(&load->def, packed);
363       nir_instr_remove(&load->instr);
364    }
365 
366    nir_instr_remove(&reg->instr);
367 }
368 
369 bool
ir3_nir_lower_64b_regs(nir_shader * shader)370 ir3_nir_lower_64b_regs(nir_shader *shader)
371 {
372    bool progress = false;
373 
374    nir_foreach_function_impl (impl, shader) {
375       bool impl_progress = false;
376       nir_builder b = nir_builder_create(impl);
377 
378       nir_foreach_reg_decl_safe (reg, impl) {
379          if (nir_intrinsic_bit_size(reg) == 64) {
380             lower_64b_reg(&b, reg);
381             impl_progress = true;
382          }
383       }
384 
385       if (impl_progress) {
386          nir_metadata_preserve(
387             impl, nir_metadata_control_flow);
388          progress = true;
389       }
390    }
391 
392    return progress;
393 }
394