1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27
28 /** @file nir_lower_io_to_scalar.c
29 *
30 * Replaces nir_load_input/nir_store_output operations with num_components !=
31 * 1 with individual per-channel operations.
32 */
33
34 static void
set_io_semantics(nir_intrinsic_instr * scalar_intr,nir_intrinsic_instr * vec_intr,unsigned component)35 set_io_semantics(nir_intrinsic_instr *scalar_intr,
36 nir_intrinsic_instr *vec_intr, unsigned component)
37 {
38 nir_io_semantics sem = nir_intrinsic_io_semantics(vec_intr);
39 sem.gs_streams = (sem.gs_streams >> (component * 2)) & 0x3;
40 nir_intrinsic_set_io_semantics(scalar_intr, sem);
41 }
42
43 static void
lower_load_input_to_scalar(nir_builder * b,nir_intrinsic_instr * intr)44 lower_load_input_to_scalar(nir_builder *b, nir_intrinsic_instr *intr)
45 {
46 b->cursor = nir_before_instr(&intr->instr);
47
48 assert(intr->dest.is_ssa);
49
50 nir_ssa_def *loads[NIR_MAX_VEC_COMPONENTS];
51
52 for (unsigned i = 0; i < intr->num_components; i++) {
53 nir_intrinsic_instr *chan_intr =
54 nir_intrinsic_instr_create(b->shader, intr->intrinsic);
55 nir_ssa_dest_init(&chan_intr->instr, &chan_intr->dest,
56 1, intr->dest.ssa.bit_size, NULL);
57 chan_intr->num_components = 1;
58
59 nir_intrinsic_set_base(chan_intr, nir_intrinsic_base(intr));
60 nir_intrinsic_set_component(chan_intr, nir_intrinsic_component(intr) + i);
61 nir_intrinsic_set_dest_type(chan_intr, nir_intrinsic_dest_type(intr));
62 set_io_semantics(chan_intr, intr, i);
63 /* offset and vertex (if needed) */
64 for (unsigned j = 0; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; ++j)
65 nir_src_copy(&chan_intr->src[j], &intr->src[j]);
66
67 nir_builder_instr_insert(b, &chan_intr->instr);
68
69 loads[i] = &chan_intr->dest.ssa;
70 }
71
72 nir_ssa_def_rewrite_uses(&intr->dest.ssa,
73 nir_vec(b, loads, intr->num_components));
74 nir_instr_remove(&intr->instr);
75 }
76
77 static void
lower_load_to_scalar(nir_builder * b,nir_intrinsic_instr * intr)78 lower_load_to_scalar(nir_builder *b, nir_intrinsic_instr *intr)
79 {
80 b->cursor = nir_before_instr(&intr->instr);
81
82 assert(intr->dest.is_ssa);
83
84 nir_ssa_def *loads[NIR_MAX_VEC_COMPONENTS];
85 unsigned offset_idx = intr->intrinsic == nir_intrinsic_load_shared ? 0 : 1;
86 nir_ssa_def *base_offset = intr->src[offset_idx].ssa;
87
88 for (unsigned i = 0; i < intr->num_components; i++) {
89 nir_intrinsic_instr *chan_intr =
90 nir_intrinsic_instr_create(b->shader, intr->intrinsic);
91 nir_ssa_dest_init(&chan_intr->instr, &chan_intr->dest,
92 1, intr->dest.ssa.bit_size, NULL);
93 chan_intr->num_components = 1;
94
95 nir_intrinsic_set_align_offset(chan_intr,
96 (nir_intrinsic_align_offset(intr) +
97 i * (intr->dest.ssa.bit_size / 8)) % nir_intrinsic_align_mul(intr));
98 nir_intrinsic_set_align_mul(chan_intr, nir_intrinsic_align_mul(intr));
99 if (nir_intrinsic_has_access(intr))
100 nir_intrinsic_set_access(chan_intr, nir_intrinsic_access(intr));
101 if (nir_intrinsic_has_range(intr))
102 nir_intrinsic_set_range(chan_intr, nir_intrinsic_range(intr));
103 if (nir_intrinsic_has_range_base(intr))
104 nir_intrinsic_set_range_base(chan_intr, nir_intrinsic_range_base(intr));
105 if (nir_intrinsic_has_base(intr))
106 nir_intrinsic_set_base(chan_intr, nir_intrinsic_base(intr));
107 for (unsigned j = 0; j < nir_intrinsic_infos[intr->intrinsic].num_srcs - 1; j++)
108 nir_src_copy(&chan_intr->src[j], &intr->src[j]);
109
110 /* increment offset per component */
111 nir_ssa_def *offset = nir_iadd_imm(b, base_offset, i * (intr->dest.ssa.bit_size / 8));
112 chan_intr->src[offset_idx] = nir_src_for_ssa(offset);
113
114 nir_builder_instr_insert(b, &chan_intr->instr);
115
116 loads[i] = &chan_intr->dest.ssa;
117 }
118
119 nir_ssa_def_rewrite_uses(&intr->dest.ssa,
120 nir_vec(b, loads, intr->num_components));
121 nir_instr_remove(&intr->instr);
122 }
123
124 static void
lower_store_output_to_scalar(nir_builder * b,nir_intrinsic_instr * intr)125 lower_store_output_to_scalar(nir_builder *b, nir_intrinsic_instr *intr)
126 {
127 b->cursor = nir_before_instr(&intr->instr);
128
129 nir_ssa_def *value = nir_ssa_for_src(b, intr->src[0], intr->num_components);
130
131 for (unsigned i = 0; i < intr->num_components; i++) {
132 if (!(nir_intrinsic_write_mask(intr) & (1 << i)))
133 continue;
134
135 nir_intrinsic_instr *chan_intr =
136 nir_intrinsic_instr_create(b->shader, intr->intrinsic);
137 chan_intr->num_components = 1;
138
139 nir_intrinsic_set_base(chan_intr, nir_intrinsic_base(intr));
140 nir_intrinsic_set_write_mask(chan_intr, 0x1);
141 nir_intrinsic_set_component(chan_intr, nir_intrinsic_component(intr) + i);
142 nir_intrinsic_set_src_type(chan_intr, nir_intrinsic_src_type(intr));
143 set_io_semantics(chan_intr, intr, i);
144
145 if (nir_intrinsic_has_io_xfb(intr)) {
146 /* Scalarize transform feedback info. */
147 unsigned component = nir_intrinsic_component(chan_intr);
148
149 for (unsigned c = 0; c <= component; c++) {
150 nir_io_xfb xfb = c < 2 ? nir_intrinsic_io_xfb(intr) :
151 nir_intrinsic_io_xfb2(intr);
152
153 if (component < c + xfb.out[c % 2].num_components) {
154 nir_io_xfb scalar_xfb;
155
156 memset(&scalar_xfb, 0, sizeof(scalar_xfb));
157 scalar_xfb.out[component % 2].num_components = 1;
158 scalar_xfb.out[component % 2].buffer = xfb.out[c % 2].buffer;
159 scalar_xfb.out[component % 2].offset = xfb.out[c % 2].offset +
160 component - c;
161 if (component < 2)
162 nir_intrinsic_set_io_xfb(chan_intr, scalar_xfb);
163 else
164 nir_intrinsic_set_io_xfb2(chan_intr, scalar_xfb);
165 break;
166 }
167 }
168 }
169
170 /* value */
171 chan_intr->src[0] = nir_src_for_ssa(nir_channel(b, value, i));
172 /* offset and vertex (if needed) */
173 for (unsigned j = 1; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; ++j)
174 nir_src_copy(&chan_intr->src[j], &intr->src[j]);
175
176 nir_builder_instr_insert(b, &chan_intr->instr);
177 }
178
179 nir_instr_remove(&intr->instr);
180 }
181
182 static void
lower_store_to_scalar(nir_builder * b,nir_intrinsic_instr * intr)183 lower_store_to_scalar(nir_builder *b, nir_intrinsic_instr *intr)
184 {
185 b->cursor = nir_before_instr(&intr->instr);
186
187 nir_ssa_def *value = nir_ssa_for_src(b, intr->src[0], intr->num_components);
188 unsigned offset_idx = intr->intrinsic == nir_intrinsic_store_shared ? 1 : 2;
189 nir_ssa_def *base_offset = intr->src[offset_idx].ssa;
190
191 /* iterate wrmask instead of num_components to handle split components */
192 u_foreach_bit(i, nir_intrinsic_write_mask(intr)) {
193 nir_intrinsic_instr *chan_intr =
194 nir_intrinsic_instr_create(b->shader, intr->intrinsic);
195 chan_intr->num_components = 1;
196
197 nir_intrinsic_set_write_mask(chan_intr, 0x1);
198 nir_intrinsic_set_align_offset(chan_intr,
199 (nir_intrinsic_align_offset(intr) +
200 i * (value->bit_size / 8)) % nir_intrinsic_align_mul(intr));
201 nir_intrinsic_set_align_mul(chan_intr, nir_intrinsic_align_mul(intr));
202 if (nir_intrinsic_has_access(intr))
203 nir_intrinsic_set_access(chan_intr, nir_intrinsic_access(intr));
204 if (nir_intrinsic_has_base(intr))
205 nir_intrinsic_set_base(chan_intr, nir_intrinsic_base(intr));
206
207 /* value */
208 chan_intr->src[0] = nir_src_for_ssa(nir_channel(b, value, i));
209 for (unsigned j = 1; j < nir_intrinsic_infos[intr->intrinsic].num_srcs - 1; j++)
210 nir_src_copy(&chan_intr->src[j], &intr->src[j]);
211
212 /* increment offset per component */
213 nir_ssa_def *offset = nir_iadd_imm(b, base_offset, i * (value->bit_size / 8));
214 chan_intr->src[offset_idx] = nir_src_for_ssa(offset);
215
216 nir_builder_instr_insert(b, &chan_intr->instr);
217 }
218
219 nir_instr_remove(&intr->instr);
220 }
221
222 static bool
nir_lower_io_to_scalar_instr(nir_builder * b,nir_instr * instr,void * data)223 nir_lower_io_to_scalar_instr(nir_builder *b, nir_instr *instr, void *data)
224 {
225 nir_variable_mode mask = *(nir_variable_mode *)data;
226
227 if (instr->type != nir_instr_type_intrinsic)
228 return false;
229
230 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
231
232 if (intr->num_components == 1)
233 return false;
234
235 if ((intr->intrinsic == nir_intrinsic_load_input ||
236 intr->intrinsic == nir_intrinsic_load_per_vertex_input) &&
237 (mask & nir_var_shader_in)) {
238 lower_load_input_to_scalar(b, intr);
239 return true;
240 }
241
242 if (intr->intrinsic == nir_intrinsic_load_per_vertex_output &&
243 (mask & nir_var_shader_out)) {
244 lower_load_input_to_scalar(b, intr);
245 return true;
246 }
247
248 if ((intr->intrinsic == nir_intrinsic_load_ubo && (mask & nir_var_mem_ubo)) ||
249 (intr->intrinsic == nir_intrinsic_load_ssbo && (mask & nir_var_mem_ssbo)) ||
250 (intr->intrinsic == nir_intrinsic_load_shared && (mask & nir_var_mem_shared))) {
251 lower_load_to_scalar(b, intr);
252 return true;
253 }
254
255 if ((intr->intrinsic == nir_intrinsic_store_output ||
256 intr->intrinsic == nir_intrinsic_store_per_vertex_output) &&
257 mask & nir_var_shader_out) {
258 lower_store_output_to_scalar(b, intr);
259 return true;
260 }
261
262 if ((intr->intrinsic == nir_intrinsic_store_ssbo && (mask & nir_var_mem_ssbo)) ||
263 (intr->intrinsic == nir_intrinsic_store_shared && (mask & nir_var_mem_shared))) {
264 lower_store_to_scalar(b, intr);
265 return true;
266 }
267
268 return false;
269 }
270
271 void
nir_lower_io_to_scalar(nir_shader * shader,nir_variable_mode mask)272 nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask)
273 {
274 nir_shader_instructions_pass(shader,
275 nir_lower_io_to_scalar_instr,
276 nir_metadata_block_index |
277 nir_metadata_dominance,
278 &mask);
279 }
280
281 static nir_variable **
get_channel_variables(struct hash_table * ht,nir_variable * var)282 get_channel_variables(struct hash_table *ht, nir_variable *var)
283 {
284 nir_variable **chan_vars;
285 struct hash_entry *entry = _mesa_hash_table_search(ht, var);
286 if (!entry) {
287 chan_vars = (nir_variable **) calloc(4, sizeof(nir_variable *));
288 _mesa_hash_table_insert(ht, var, chan_vars);
289 } else {
290 chan_vars = (nir_variable **) entry->data;
291 }
292
293 return chan_vars;
294 }
295
296 /*
297 * Note that the src deref that we are cloning is the head of the
298 * chain of deref instructions from the original intrinsic, but
299 * the dst we are cloning to is the tail (because chains of deref
300 * instructions are created back to front)
301 */
302
303 static nir_deref_instr *
clone_deref_array(nir_builder * b,nir_deref_instr * dst_tail,const nir_deref_instr * src_head)304 clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
305 const nir_deref_instr *src_head)
306 {
307 const nir_deref_instr *parent = nir_deref_instr_parent(src_head);
308
309 if (!parent)
310 return dst_tail;
311
312 assert(src_head->deref_type == nir_deref_type_array);
313
314 dst_tail = clone_deref_array(b, dst_tail, parent);
315
316 return nir_build_deref_array(b, dst_tail,
317 nir_ssa_for_src(b, src_head->arr.index, 1));
318 }
319
320 static void
lower_load_to_scalar_early(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var,struct hash_table * split_inputs,struct hash_table * split_outputs)321 lower_load_to_scalar_early(nir_builder *b, nir_intrinsic_instr *intr,
322 nir_variable *var, struct hash_table *split_inputs,
323 struct hash_table *split_outputs)
324 {
325 b->cursor = nir_before_instr(&intr->instr);
326
327 assert(intr->dest.is_ssa);
328
329 nir_ssa_def *loads[NIR_MAX_VEC_COMPONENTS];
330
331 nir_variable **chan_vars;
332 if (var->data.mode == nir_var_shader_in) {
333 chan_vars = get_channel_variables(split_inputs, var);
334 } else {
335 chan_vars = get_channel_variables(split_outputs, var);
336 }
337
338 for (unsigned i = 0; i < intr->num_components; i++) {
339 nir_variable *chan_var = chan_vars[var->data.location_frac + i];
340 if (!chan_vars[var->data.location_frac + i]) {
341 chan_var = nir_variable_clone(var, b->shader);
342 chan_var->data.location_frac = var->data.location_frac + i;
343 chan_var->type = glsl_channel_type(chan_var->type);
344
345 chan_vars[var->data.location_frac + i] = chan_var;
346
347 nir_shader_add_variable(b->shader, chan_var);
348 }
349
350 nir_intrinsic_instr *chan_intr =
351 nir_intrinsic_instr_create(b->shader, intr->intrinsic);
352 nir_ssa_dest_init(&chan_intr->instr, &chan_intr->dest,
353 1, intr->dest.ssa.bit_size, NULL);
354 chan_intr->num_components = 1;
355
356 nir_deref_instr *deref = nir_build_deref_var(b, chan_var);
357
358 deref = clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
359
360 chan_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
361
362 if (intr->intrinsic == nir_intrinsic_interp_deref_at_offset ||
363 intr->intrinsic == nir_intrinsic_interp_deref_at_sample ||
364 intr->intrinsic == nir_intrinsic_interp_deref_at_vertex)
365 nir_src_copy(&chan_intr->src[1], &intr->src[1]);
366
367 nir_builder_instr_insert(b, &chan_intr->instr);
368
369 loads[i] = &chan_intr->dest.ssa;
370 }
371
372 nir_ssa_def_rewrite_uses(&intr->dest.ssa,
373 nir_vec(b, loads, intr->num_components));
374
375 /* Remove the old load intrinsic */
376 nir_instr_remove(&intr->instr);
377 }
378
379 static void
lower_store_output_to_scalar_early(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var,struct hash_table * split_outputs)380 lower_store_output_to_scalar_early(nir_builder *b, nir_intrinsic_instr *intr,
381 nir_variable *var,
382 struct hash_table *split_outputs)
383 {
384 b->cursor = nir_before_instr(&intr->instr);
385
386 nir_ssa_def *value = nir_ssa_for_src(b, intr->src[1], intr->num_components);
387
388 nir_variable **chan_vars = get_channel_variables(split_outputs, var);
389 for (unsigned i = 0; i < intr->num_components; i++) {
390 if (!(nir_intrinsic_write_mask(intr) & (1 << i)))
391 continue;
392
393 nir_variable *chan_var = chan_vars[var->data.location_frac + i];
394 if (!chan_vars[var->data.location_frac + i]) {
395 chan_var = nir_variable_clone(var, b->shader);
396 chan_var->data.location_frac = var->data.location_frac + i;
397 chan_var->type = glsl_channel_type(chan_var->type);
398
399 chan_vars[var->data.location_frac + i] = chan_var;
400
401 nir_shader_add_variable(b->shader, chan_var);
402 }
403
404 nir_intrinsic_instr *chan_intr =
405 nir_intrinsic_instr_create(b->shader, intr->intrinsic);
406 chan_intr->num_components = 1;
407
408 nir_intrinsic_set_write_mask(chan_intr, 0x1);
409
410 nir_deref_instr *deref = nir_build_deref_var(b, chan_var);
411
412 deref = clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
413
414 chan_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
415 chan_intr->src[1] = nir_src_for_ssa(nir_channel(b, value, i));
416
417 nir_builder_instr_insert(b, &chan_intr->instr);
418 }
419
420 /* Remove the old store intrinsic */
421 nir_instr_remove(&intr->instr);
422 }
423
424 struct io_to_scalar_early_state {
425 struct hash_table *split_inputs, *split_outputs;
426 nir_variable_mode mask;
427 };
428
429 static bool
nir_lower_io_to_scalar_early_instr(nir_builder * b,nir_instr * instr,void * data)430 nir_lower_io_to_scalar_early_instr(nir_builder *b, nir_instr *instr, void *data)
431 {
432 struct io_to_scalar_early_state *state = data;
433
434 if (instr->type != nir_instr_type_intrinsic)
435 return false;
436
437 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
438
439 if (intr->num_components == 1)
440 return false;
441
442 if (intr->intrinsic != nir_intrinsic_load_deref &&
443 intr->intrinsic != nir_intrinsic_store_deref &&
444 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
445 intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
446 intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
447 intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
448 return false;
449
450 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
451 if (!nir_deref_mode_is_one_of(deref, state->mask))
452 return false;
453
454 nir_variable *var = nir_deref_instr_get_variable(deref);
455 nir_variable_mode mode = var->data.mode;
456
457 /* TODO: add patch support */
458 if (var->data.patch)
459 return false;
460
461 /* TODO: add doubles support */
462 if (glsl_type_is_64bit(glsl_without_array(var->type)))
463 return false;
464
465 if (!(b->shader->info.stage == MESA_SHADER_VERTEX &&
466 mode == nir_var_shader_in) &&
467 var->data.location < VARYING_SLOT_VAR0 &&
468 var->data.location >= 0)
469 return false;
470
471 /* Don't bother splitting if we can't opt away any unused
472 * components.
473 */
474 if (var->data.always_active_io)
475 return false;
476
477 if (var->data.must_be_shader_input)
478 return false;
479
480 /* Skip types we cannot split */
481 if (glsl_type_is_matrix(glsl_without_array(var->type)) ||
482 glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
483 return false;
484
485 switch (intr->intrinsic) {
486 case nir_intrinsic_interp_deref_at_centroid:
487 case nir_intrinsic_interp_deref_at_sample:
488 case nir_intrinsic_interp_deref_at_offset:
489 case nir_intrinsic_interp_deref_at_vertex:
490 case nir_intrinsic_load_deref:
491 if ((state->mask & nir_var_shader_in && mode == nir_var_shader_in) ||
492 (state->mask & nir_var_shader_out && mode == nir_var_shader_out)) {
493 lower_load_to_scalar_early(b, intr, var, state->split_inputs,
494 state->split_outputs);
495 return true;
496 }
497 break;
498 case nir_intrinsic_store_deref:
499 if (state->mask & nir_var_shader_out &&
500 mode == nir_var_shader_out) {
501 lower_store_output_to_scalar_early(b, intr, var, state->split_outputs);
502 return true;
503 }
504 break;
505 default:
506 break;
507 }
508
509 return false;
510 }
511
512 /*
513 * This function is intended to be called earlier than nir_lower_io_to_scalar()
514 * i.e. before nir_lower_io() is called.
515 */
516 bool
nir_lower_io_to_scalar_early(nir_shader * shader,nir_variable_mode mask)517 nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask)
518 {
519 struct io_to_scalar_early_state state = {
520 .split_inputs = _mesa_pointer_hash_table_create(NULL),
521 .split_outputs = _mesa_pointer_hash_table_create(NULL),
522 .mask = mask
523 };
524
525 bool progress = nir_shader_instructions_pass(shader,
526 nir_lower_io_to_scalar_early_instr,
527 nir_metadata_block_index |
528 nir_metadata_dominance,
529 &state);
530
531 /* Remove old input from the shaders inputs list */
532 hash_table_foreach(state.split_inputs, entry) {
533 nir_variable *var = (nir_variable *) entry->key;
534 exec_node_remove(&var->node);
535
536 free(entry->data);
537 }
538
539 /* Remove old output from the shaders outputs list */
540 hash_table_foreach(state.split_outputs, entry) {
541 nir_variable *var = (nir_variable *) entry->key;
542 exec_node_remove(&var->node);
543
544 free(entry->data);
545 }
546
547 _mesa_hash_table_destroy(state.split_inputs, NULL);
548 _mesa_hash_table_destroy(state.split_outputs, NULL);
549
550 nir_remove_dead_derefs(shader);
551
552 return progress;
553 }
554